Repository: egenn/rtemis Branch: main Commit: ee040e1ef75f Files: 296 Total size: 1.4 MB Directory structure: gitextract_jpidflln/ ├── .Rbuildignore ├── .github/ │ ├── .gitignore │ ├── CONTRIBUTING.md │ └── workflows/ │ └── R-CMD-check.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE.md ├── Makefile ├── NAMESPACE ├── NEWS.md ├── R/ │ ├── 00_S7init.R │ ├── 01_ExecutionConfig.R │ ├── 02_Hyperparameters.R │ ├── 03_Metrics.R │ ├── 04_Preprocessor.R │ ├── 05_Resampler.R │ ├── 06_Tuner.R │ ├── 07_Supervised.R │ ├── 08_MassUni.R │ ├── 09_ClusteringConfig.R │ ├── 10_Clustering.R │ ├── 11_DecompositionConfig.R │ ├── 12_Decomposition.R │ ├── 13_Themes.R │ ├── 14_SuperConfig.R │ ├── 15_CheckData.R │ ├── 16_S7utils.R │ ├── algorithmDB.R │ ├── calibrate.R │ ├── check_data.R │ ├── check_input_data.R │ ├── cluster.R │ ├── cluster_CMeans.R │ ├── cluster_DBSCAN.R │ ├── cluster_flexclust.R │ ├── data_xt_example.R │ ├── ddSci.R │ ├── ddb.R │ ├── decomp.R │ ├── decomp_ICA.R │ ├── decomp_Isomap.R │ ├── decomp_NMF.R │ ├── decomp_PCA.R │ ├── decomp_UMAP.R │ ├── decomp_tSNE.R │ ├── draw_3Dscatter.R │ ├── draw_bar.R │ ├── draw_box.R │ ├── draw_calibration.R │ ├── draw_confusion.R │ ├── draw_dist.R │ ├── draw_graphd3.R │ ├── draw_graphjs.R │ ├── draw_heatmap.R │ ├── draw_leaflet.R │ ├── draw_pie.R │ ├── draw_protein.R │ ├── draw_pvals.R │ ├── draw_roc.R │ ├── draw_scatter.R │ ├── draw_spectrogram.R │ ├── draw_survfit.R │ ├── draw_table.R │ ├── draw_ts.R │ ├── draw_varimp.R │ ├── draw_volcano.R │ ├── draw_xt.R │ ├── fmt.R │ ├── ifw.R │ ├── massGLM.R │ ├── metrics.R │ ├── msg.R │ ├── preprocess.R │ ├── present.R │ ├── read.R │ ├── resample.R │ ├── rtemis-package.R │ ├── rtemis_color_system.R │ ├── theme.R │ ├── train.R │ ├── train_CART.R │ ├── train_GAM.R │ ├── train_GLM.R │ ├── train_GLMNET.R │ ├── train_Isotonic.R │ ├── train_LightCART.R │ ├── train_LightGBM.R │ ├── train_LightRF.R │ ├── train_LightRuleFit.R │ ├── train_Ranger.R │ ├── train_SVM.R │ ├── train_TabNet.R │ ├── tune.R │ ├── tune_GridSearch.R │ ├── utils.R │ ├── utils_art.R │ ├── utils_async.R │ ├── utils_checks.R │ ├── utils_color.R │ ├── utils_data.R │ ├── utils_data.table.R │ ├── utils_date.R │ ├── utils_df.R │ ├── utils_exec.R │ ├── utils_files.R │ ├── utils_html.R │ ├── utils_io.R │ ├── utils_lightgbm.R │ ├── utils_palettes.R │ ├── utils_plot.R │ ├── utils_plotly.R │ ├── utils_print.R │ ├── utils_rt.R │ ├── utils_rules.R │ ├── utils_strings.R │ ├── utils_supervised.R │ ├── utils_uniprot.R │ ├── utils_xt.R │ └── zzz.R ├── README.md ├── data/ │ └── xt_example.rda ├── data-raw/ │ └── create_xt_example.R ├── inst/ │ ├── CITATION │ ├── extdata/ │ │ ├── us-counties.rds │ │ └── us-states.rds │ └── resources/ │ ├── aminoacids.rds │ ├── rtemis.utf8 │ └── rtemis2.utf8 ├── man/ │ ├── available_algorithms.Rd │ ├── available_draw.Rd │ ├── available_themes.Rd │ ├── calibrate.Rd │ ├── check_data.Rd │ ├── choose_theme.Rd │ ├── class_imbalance.Rd │ ├── classification_metrics.Rd │ ├── clean_colnames.Rd │ ├── clean_names.Rd │ ├── cluster.Rd │ ├── col2grayscale.Rd │ ├── color_adjust.Rd │ ├── ddSci.Rd │ ├── ddb_collect.Rd │ ├── ddb_data.Rd │ ├── decomp.Rd │ ├── describe.Rd │ ├── df_movecolumn.Rd │ ├── df_nunique_perfeat.Rd │ ├── dot-list_to_Hyperparameters.Rd │ ├── dot-list_to_ResamplerConfig.Rd │ ├── dot-list_to_TunerConfig.Rd │ ├── draw_3Dscatter.Rd │ ├── draw_bar.Rd │ ├── draw_box.Rd │ ├── draw_calibration.Rd │ ├── draw_confusion.Rd │ ├── draw_dist.Rd │ ├── draw_fit.Rd │ ├── draw_graphD3.Rd │ ├── draw_graphjs.Rd │ ├── draw_heatmap.Rd │ ├── draw_leaflet.Rd │ ├── draw_pie.Rd │ ├── draw_protein.Rd │ ├── draw_pvals.Rd │ ├── draw_roc.Rd │ ├── draw_scatter.Rd │ ├── draw_spectrogram.Rd │ ├── draw_survfit.Rd │ ├── draw_table.Rd │ ├── draw_ts.Rd │ ├── draw_varimp.Rd │ ├── draw_volcano.Rd │ ├── draw_xt.Rd │ ├── dt_describe.Rd │ ├── dt_inspect_types.Rd │ ├── dt_keybin_reshape.Rd │ ├── dt_merge.Rd │ ├── dt_names_by_attr.Rd │ ├── dt_nunique_perfeat.Rd │ ├── dt_pctmatch.Rd │ ├── dt_pctmissing.Rd │ ├── dt_set_autotypes.Rd │ ├── dt_set_clean_all.Rd │ ├── dt_set_cleanfactorlevels.Rd │ ├── dt_set_logical2factor.Rd │ ├── dt_set_one_hot.Rd │ ├── exc.Rd │ ├── feature_matrix.Rd │ ├── feature_names.Rd │ ├── features.Rd │ ├── get_factor_names.Rd │ ├── get_mode.Rd │ ├── get_msg_sink.Rd │ ├── get_palette.Rd │ ├── getnames.Rd │ ├── getnamesandtypes.Rd │ ├── grapes-BC-grapes.Rd │ ├── inc.Rd │ ├── index_col_by_attr.Rd │ ├── init_project_dir.Rd │ ├── inspect.Rd │ ├── inspect_type.Rd │ ├── is_constant.Rd │ ├── labelify.Rd │ ├── massGLM.Rd │ ├── matchcases.Rd │ ├── mgetnames.Rd │ ├── names_by_class.Rd │ ├── one_hot2factor.Rd │ ├── outcome.Rd │ ├── outcome_name.Rd │ ├── plot.MassGLM.Rd │ ├── plot_manhattan.Rd │ ├── plot_roc.Rd │ ├── plot_true_pred.Rd │ ├── plot_varimp.Rd │ ├── preprocess.Rd │ ├── preprocessed.Rd │ ├── present.Rd │ ├── previewcolor.Rd │ ├── read.Rd │ ├── read_config.Rd │ ├── regression_metrics.Rd │ ├── resample.Rd │ ├── rnormmat.Rd │ ├── rtemis-package.Rd │ ├── rtemis_colors.Rd │ ├── rtversion.Rd │ ├── runifmat.Rd │ ├── set_msg_sink.Rd │ ├── set_outcome.Rd │ ├── setdiffsym.Rd │ ├── setup_CART.Rd │ ├── setup_CMeans.Rd │ ├── setup_DBSCAN.Rd │ ├── setup_ExecutionConfig.Rd │ ├── setup_GAM.Rd │ ├── setup_GLM.Rd │ ├── setup_GLMNET.Rd │ ├── setup_GridSearch.Rd │ ├── setup_HardCL.Rd │ ├── setup_ICA.Rd │ ├── setup_Isomap.Rd │ ├── setup_Isotonic.Rd │ ├── setup_KMeans.Rd │ ├── setup_LightCART.Rd │ ├── setup_LightGBM.Rd │ ├── setup_LightRF.Rd │ ├── setup_LightRuleFit.Rd │ ├── setup_LinearSVM.Rd │ ├── setup_NMF.Rd │ ├── setup_NeuralGas.Rd │ ├── setup_PCA.Rd │ ├── setup_Preprocessor.Rd │ ├── setup_RadialSVM.Rd │ ├── setup_Ranger.Rd │ ├── setup_Resampler.Rd │ ├── setup_SuperConfig.Rd │ ├── setup_SuperConfigLive.Rd │ ├── setup_TabNet.Rd │ ├── setup_UMAP.Rd │ ├── setup_tSNE.Rd │ ├── size.Rd │ ├── table_column_attr.Rd │ ├── theme.Rd │ ├── to_json.Rd │ ├── train.Rd │ ├── uniprot_get.Rd │ ├── with_msg_sink.Rd │ ├── write_toml.Rd │ ├── xt_example.Rd │ └── xtdescribe.Rd └── tests/ ├── testthat/ │ ├── test_Calibration.R │ ├── test_CheckData.R │ ├── test_Clustering.R │ ├── test_Decomposition.R │ ├── test_ExecutionConfig.R │ ├── test_Hyperparameters.R │ ├── test_Metrics.R │ ├── test_Preprocessor.R │ ├── test_Resampler.R │ ├── test_SuperConfig.R │ ├── test_SuperConfigLive.R │ ├── test_Supervised.R │ ├── test_Theme.R │ ├── test_Tuner.R │ ├── test_checks.R │ ├── test_colorsystem.R │ ├── test_draw.R │ ├── test_idx.R │ ├── test_massGLM.R │ ├── test_msg_sink.R │ ├── test_strings.R │ └── test_to_json.R └── testthat.R ================================================ FILE CONTENTS ================================================ ================================================ FILE: .Rbuildignore ================================================ ^__dev$ ^__validation$ ^_pkgdown\.yml$ ^.*\.code-workspace$ ^.*\.Rcheck$ ^.*\.tar.gz$ ^[.]?air[.]toml$ ^\.claude$ ^\.DS_Store$ ^\.gemini$ ^\.github$ ^\.lintr$ ^\.rtms-instructions\.md$ ^\.vscode$ ^cran-comments\.md$ ^LICENSE\.md$ ^data-raw$ ^dev$ ^docs$ ^specs$ ^AGENTS\.md$ ^NEWS\.md$ ^pkgdown$ ^Makefile$ ^SKILL\.md$ ================================================ FILE: .github/.gitignore ================================================ *.html ================================================ FILE: .github/CONTRIBUTING.md ================================================ # Contributing to rtemis Thank you for your interest in contributing to **rtemis**! This guide will help you report issues effectively. ## Before Opening an Issue ### Update to Latest Version Ensure you're using the latest version of rtemis (v0.99+). Many issues may already be fixed in recent updates. ```r # Install from CRAN install.packages("rtemis") # Install from GitHub pak::pak("rtemis-org/rtemis") # Install from r-universe install.packages('rtemis', repos = 'https://rtemis-org.r-universe.dev') # Check your version packageVersion("rtemis") ``` ### Check Existing Issues Please search [existing issues](https://github.com/rtemis-org/rtemis/issues) to see if your problem or suggestion has already been reported. If you find a related issue, add a comment with any additional information. ### Review Documentation - **API Documentation**: https://docs.rtemis.org/r/ml-api/ - **General Documentation**: https://docs.rtemis.org/r/ml ## Opening an Issue ### Issue Types We welcome the following types of issues: 1. **🐛 Bug Reports**: Unexpected behavior, errors, or crashes. (Use `[BUG]` in the title) 2. **✨ Feature Requests**: Ideas for new functionality. (Use `[FEATURE]` in the title) 3. **📚 Documentation**: Improvements to docs or examples. (Use `[DOC]` in the title) 4. **❓ Questions**: Use [Discussions](https://github.com/rtemis-org/rtemis/discussions) for usage questions ### Bug Reports A good bug report should include: #### Required Information 1. **rtemis version**: Output of `packageVersion("rtemis")` 2. **R version**: Output of `R.version.string` 3. **Operating System**: e.g., macOS 14.5, Ubuntu 22.04, Windows 11 4. **Clear description**: What did you expect vs. what actually happened? #### Reproducible Example **Critical**: Provide a minimal reproducible example. Use the template below: ```r # Load required packages library(rtemis) library(data.table) # if needed # Create minimal data set.seed(2025) n <- 100 x <- rnormmat(n, 3) y <- x[, 1] + x[, 2] + rnorm(n) dat <- data.frame(x, y) # Demonstrate the issue mod <- train( x = dat, algorithm = "glm" ) # Expected: Model trains successfully # Actual: Error message... ``` #### Error Messages Include **complete error messages** with full stack traces. If the error is verbose, use a code block: ``` Error in train(...): ! You must define either `hyperparameters` or `algorithm`. ``` #### Session Info (for complex issues) For crashes or environment-specific issues, include: ```r sessionInfo() ``` ### Feature Requests For feature requests, please describe: 1. **Use case**: What problem would this solve? 2. **Proposed solution**: How should it work? 3. **Alternatives considered**: What workarounds exist currently? 4. **Impact**: Who would benefit from this feature? **Example:** > **Use case**: I frequently need to train models with time-series cross-validation but the current resampling methods don't preserve temporal order. > > **Proposed solution**: Add `setup_TimeSeriesCV()` that creates train/test splits respecting time ordering. > > **Alternatives**: Currently using custom resampling with `outer_resampling` parameter, but it's verbose and error-prone. ### Documentation Issues For documentation improvements: 1. **Location**: Specify which page or function (e.g., `?train`, `?setup_GLMNET`) 2. **Problem**: What's unclear, incorrect, or missing? 3. **Suggestion**: How could it be improved? ## Version-Specific Notes ### rtemis 0.99+ vs. rtemisalpha (Legacy) **Important**: This repository contains **rtemis 0.99+**, a complete rewrite using S7 classes. If you're using the legacy version (`rtemisalpha`), please note: - Legacy issues should reference [rtemis-legacy](https://github.com/rtemis-org/rtemis-legacy) (unmaintained) - Migration questions are welcome here - API differences are expected (see README.md for major changes) ### Active Development rtemis 0.99+ is under active development. Features may change between releases. When reporting issues: - Specify your branch if not using `main` (check with `git branch`) - Note if the issue appears in a specific algorithm (some are being ported from the legacy version) ## What Happens Next? 1. **Triage**: Maintainers will review and label your issue 2. **Discussion**: We may ask for clarification or additional details 3. **Resolution**: - **Bugs**: Fixed in upcoming releases, referenced in commit messages - **Features**: Evaluated for inclusion in roadmap - **Questions**: Answered or redirected to appropriate resources ## Code of Conduct Be respectful and constructive. We're all here to improve rtemis together. ## Pull Requests While this guide focuses on issues, pull requests are welcome! Key points: - Discuss major changes in an issue first - Follow existing code style (S7 classes, roxygen2 documentation) - All `@param` must follow format: `Class: Description ending with period.` - Include tests for new functionality - Update documentation as needed ## Questions? - **General usage**: [GitHub Discussions](https://github.com/rtemis-org/rtemis/discussions) - **Bug reports/features**: [GitHub Issues](https://github.com/rtemis-org/rtemis/issues) - **Security issues**: Contact maintainers directly (see DESCRIPTION file) --- Thank you for contributing to rtemis. ================================================ FILE: .github/workflows/R-CMD-check.yaml ================================================ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: pull_request: push: branches: [main] workflow_dispatch: name: R-CMD-check permissions: read-all concurrency: group: R-CMD-check-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: R-CMD-check: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-r@v2 with: use-public-rspm: true - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::rcmdcheck needs: check - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true build_args: 'c("--no-manual", "--compact-vignettes=gs+qpdf")' error-on: '"note"' ================================================ FILE: .gitignore ================================================ # Dev __dev/ __validation/ dev/ __out/ specs/ # Mac OS .DS_Store # VS Code .vscode *.code-workspace # R History files .Rhistory .Rapp.history # Output files from R CMD build /*.tar.gz # Output files from R CMD check /*.Rcheck/ # pkgdown _pkgdown.yml pkgdown/ # Air air.toml # produced vignettes vignettes/*.html vignettes/*.pdf # Temporary files created by R markdown *.utf8.md *.knit.md # lintr .lintr # CRAN cran-comments.md # Manual *.pdf # Assistants AGENTS.md .claude/ SKILL.md ================================================ FILE: DESCRIPTION ================================================ Package: rtemis Version: 1.2.0 Title: Machine Learning and Visualization Date: 2026-05-12 Authors@R: person(given = "E.D.", family = "Gennatas", role = c("aut", "cre", "cph"), email = "gennatas@gmail.com", comment = c(ORCID = "0000-0001-9280-3609")) Description: Machine learning and visualization package with an 'S7' backend featuring comprehensive type checking and validation, paired with an efficient functional user-facing API. train(), cluster(), and decomp() provide one-call access to supervised and unsupervised learning. All configuration steps are performed using setup functions and validated. A single call to train() handles preprocessing, hyperparameter tuning, and testing with nested resampling. Supports 'data.frame', 'data.table', and 'tibble' inputs, parallel execution, and interactive visualizations. The package first appeared in E.D. Gennatas (2017) . License: GPL (>= 3) URL: https://www.rtemis.org, https://docs.rtemis.org/r/ml, https://docs.rtemis.org/r/ml-api/ BugReports: https://github.com/rtemis-org/rtemis/issues ByteCompile: yes Depends: R (>= 4.1.0) Imports: grDevices, graphics, stats, methods, utils, S7, data.table, future, htmltools, cli Suggests: arrow, bit64, car, colorspace, DBI, dbscan, dendextend (>= 0.18.0), duckdb, e1071, farff, fastICA, flexclust, future.apply, future.mirai, futurize, geosphere, ggplot2, glmnet, geojsonio, glue, grid, gsubfn, haven, heatmaply, htmlwidgets, igraph, jsonlite, later, leaflet, leaps, lightAUC, lightgbm, matrixStats, mgcv, mice, mirai, missRanger, nanonext, networkD3, NMF, openxlsx, parallelly, partykit, plotly, plumber, pROC, progressr, psych, pvclust, ranger, reactable, readxl, reticulate, ROCR, rpart, Rtsne, seqinr, sf, shapr, survival, tabnet, threejs, testthat (>= 3.0.0), tibble, timeDate, toml, torch, uwot, vegan, vroom, withr Encoding: UTF-8 Config/testthat/edition: 3 Roxygen: list(markdown = TRUE) LazyData: true Config/roxygen2/version: 8.0.0 ================================================ FILE: LICENSE.md ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: Makefile ================================================ PKG := $(shell awk '/^Package:/{print $$2; exit}' DESCRIPTION) R ?= R RSCRIPT ?= Rscript CHECK_DIR := $(PKG).Rcheck TARBALL_GLOB := $(PKG)_*.tar.gz msg = @printf '\033[38;2;108;163;160m[%s] %s\033[0m\n' "$$(date -u '+%Y-%m-%d %H:%M:%SZ')" "$(1)" .DEFAULT_GOAL := help .PHONY: help format document install test build check check-cran check-cran-no-tests site clean help: $(call msg,Available targets:) @printf '%s\n' \ ' format Format R code with air CLI (if available)' \ ' document Generate roxygen2 documentation' \ ' install Document and install the package locally with pak' \ ' test Run testthat::test_local(stop_on_failure = TRUE)' \ ' build Build the source tarball' \ ' check Run R CMD check on the built tarball' \ ' check-cran Run R CMD check --as-cran' \ ' check-cran-no-tests Run R CMD check --as-cran --no-tests' \ ' manual Build package manual' \ ' site Build pkgdown site' \ ' clean Remove tarballs and .Rcheck output' format: $(call msg,─── Formatting $(PKG) package... ───) @if command -v air >/dev/null 2>&1; then \ air format .; \ else \ echo " Note: 'air' CLI not found — skipping R code formatting."; \ fi $(call msg,Done) document: format $(call msg,─── Documenting $(PKG) package... ───) $(RSCRIPT) -e "roxygen2::roxygenize()" $(call msg,Done) install: document $(call msg,─── Installing $(PKG) package... ───) $(RSCRIPT) -e "pak::local_install(upgrade = TRUE)" $(call msg,Done) test: $(call msg,─── Running testthat tests for $(PKG)... ───) $(RSCRIPT) -e "testthat::test_local(stop_on_failure = TRUE)" $(call msg,Done) build: clean $(call msg,─── Building $(PKG) package... ───) $(R) CMD build . $(call msg,Done) check: build $(call msg,─── Running R CMD check on $(PKG)... ───) $(R) CMD check $(TARBALL_GLOB) rm -f $(TARBALL_GLOB) $(call msg,Done) check-cran: build $(call msg,─── Running R CMD check --as-cran on $(PKG)... ───) $(R) CMD check $(TARBALL_GLOB) --as-cran rm -f $(TARBALL_GLOB) $(call msg,Done) check-cran-no-tests: build $(call msg,─── Running R CMD check --as-cran on $(PKG)... ───) $(R) CMD check $(TARBALL_GLOB) --as-cran --no-tests rm -f $(TARBALL_GLOB) $(call msg,Done) manual: $(call msg,─── Building manual for $(PKG)... ───) $(R) CMD Rd2pdf . --output=$(PKG).pdf $(call msg,Done) site: $(call msg,─── Building pkgdown site for $(PKG)... ───) $(RSCRIPT) -e "pkgdown::build_site()" $(call msg,Done) clean: $(call msg,─── Cleaning build artifacts... ───) rm -rf $(CHECK_DIR) rm -f $(TARBALL_GLOB) $(call msg,Done) ================================================ FILE: NAMESPACE ================================================ # Generated by roxygen2: do not edit by hand S3method(plot,MassGLM) export("%BC%") export(.list_to_Hyperparameters) export(.list_to_ResamplerConfig) export(.list_to_TunerConfig) export(available_clustering) export(available_decomposition) export(available_draw) export(available_supervised) export(available_themes) export(calibrate) export(check_data) export(choose_theme) export(class_imbalance) export(classification_metrics) export(clean_colnames) export(clean_names) export(cluster) export(col2grayscale) export(color_adjust) export(ddSci) export(ddb_collect) export(ddb_data) export(decomp) export(describe) export(df_movecolumn) export(df_nunique_perfeat) export(draw_3Dscatter) export(draw_bar) export(draw_box) export(draw_calibration) export(draw_confusion) export(draw_dist) export(draw_fit) export(draw_graphD3) export(draw_graphjs) export(draw_heatmap) export(draw_leaflet) export(draw_pie) export(draw_protein) export(draw_pvals) export(draw_roc) export(draw_scatter) export(draw_spectrogram) export(draw_survfit) export(draw_table) export(draw_ts) export(draw_varimp) export(draw_volcano) export(draw_xt) export(dt_describe) export(dt_inspect_types) export(dt_keybin_reshape) export(dt_merge) export(dt_names_by_attr) export(dt_nunique_perfeat) export(dt_pctmatch) export(dt_pctmissing) export(dt_set_autotypes) export(dt_set_clean_all) export(dt_set_cleanfactorlevels) export(dt_set_logical2factor) export(dt_set_one_hot) export(exc) export(feature_matrix) export(feature_names) export(features) export(get_factor_names) export(get_mode) export(get_msg_sink) export(get_palette) export(getcharacternames) export(getdatenames) export(getfactornames) export(getlogicalnames) export(getnames) export(getnamesandtypes) export(getnumericnames) export(inc) export(index_col_by_attr) export(init_project_dir) export(inspect) export(inspect_type) export(is_constant) export(labelify) export(massGLM) export(matchcases) export(mgetnames) export(names_by_class) export(one_hot2factor) export(outcome) export(outcome_name) export(plot_manhattan) export(plot_manhattan.MassGLM) export(plot_roc) export(plot_true_pred) export(plot_varimp) export(preprocess) export(preprocess.class_tabular.Preprocessor) export(preprocess.class_tabular.PreprocessorConfig) export(preprocessed) export(present) export(previewcolor) export(read) export(read_config) export(regression_metrics) export(resample) export(rnormmat) export(rtemis_colors) export(rtversion) export(runifmat) export(set_msg_sink) export(set_outcome) export(setdiffsym) export(setup_CART) export(setup_CMeans) export(setup_DBSCAN) export(setup_ExecutionConfig) export(setup_GAM) export(setup_GLM) export(setup_GLMNET) export(setup_GridSearch) export(setup_HardCL) export(setup_ICA) export(setup_Isomap) export(setup_Isotonic) export(setup_KMeans) export(setup_LightCART) export(setup_LightGBM) export(setup_LightRF) export(setup_LightRuleFit) export(setup_LinearSVM) export(setup_NMF) export(setup_NeuralGas) export(setup_PCA) export(setup_Preprocessor) export(setup_RadialSVM) export(setup_Ranger) export(setup_Resampler) export(setup_SuperConfig) export(setup_SuperConfigLive) export(setup_TabNet) export(setup_UMAP) export(setup_tSNE) export(size) export(table_column_attr) export(theme_black) export(theme_blackgrid) export(theme_blackigrid) export(theme_darkgray) export(theme_darkgraygrid) export(theme_darkgrayigrid) export(theme_lightgraygrid) export(theme_mediumgraygrid) export(theme_white) export(theme_whitegrid) export(theme_whiteigrid) export(to_json) export(train) export(uniprot_get) export(with_msg_sink) export(write_toml) export(xtdescribe) import(S7) import(data.table) import(grDevices) import(graphics) import(htmltools) import(methods) import(stats) importFrom(utils,getFromNamespace) importFrom(utils,head) importFrom(utils,packageVersion) importFrom(utils,sessionInfo) importFrom(utils,tail) ================================================ FILE: NEWS.md ================================================ # rtemis news ## 1.0.0 First CRAN release ## 1.0.1 - Introduce `VariableImportance` S7 class to represent variable importance data, allowing for more than one measure of importance per model and update all relevant classes and methods. - Calculate Partial_Effect_Variance as variable importance measure for GAM models - Add `execution_config` argument to internal `train_` method and use it in LightRuleFit to propagate to LightGBM and GLMNET calls. ================================================ FILE: R/00_S7init.R ================================================ # S7_init.R # ::rtemis:: # 2025- EDG rtemis.org # References # S7 generics: https://rconsortium.github.io/S7/articles/generics-methods.html # %% --- S3 Classes for S7 ---------------------------------------------------------------------------- class_data.table <- new_S3_class("data.table") class_lgb.Booster <- new_S3_class("lgb.Booster") # All internal methods should support data.frame, data.table, tbl_df class_tabular <- new_union(class_data.frame, class_data.table) # Supervised learning model classes class_glm <- new_S3_class("glm") class_gam <- new_S3_class("gam") class_glmnet <- new_S3_class("glmnet") class_cv.glmnet <- new_S3_class("cv.glmnet") class_stepfun <- new_S3_class("stepfun") # Isotonic regression class_rpart <- new_S3_class("rpart") class_ranger <- new_S3_class("ranger") class_svm <- new_S3_class("svm") class_tabnet_fit <- new_S3_class("tabnet_fit") # %% --- Generics ------------------------------------------------------------------------------------- # %% repr ---- #' String representation #' #' @param x rtemis object. #' #' @return Character string representation of the object. #' #' @author EDG #' @keywords internal #' @noRd repr <- new_generic("repr", "x") # %% inspect ---- #' Inspect rtemis object #' #' @param x R object to inspect. #' #' @return Called for side effect of printing information to console; returns character string #' invisibly. #' #' @author EDG #' @export #' #' @examples #' inspect(iris) inspect <- new_generic("inspect", "x", function(x) { S7_dispatch() }) # /rtemis::inspect # %% preprocess ---- #' @name #' preprocess #' #' @title #' Preprocess Data #' #' @description #' Preprocess data for analysis and visualization. #' #' @details #' Methods are provided for preprocessing training set data, which accepts a `PreprocessorConfig` #' object, and for preprocessing validation and test set data, which accept a `Preprocessor` #' object. #' #' @return `Preprocessor` object. #' #' @author EDG #' @rdname preprocess #' @export #' #' @examples #' # Setup a `Preprocessor`: this outputs a `PreprocessorConfig` object. #' prp <- setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE) #' #' # Includes a long list of parameters #' prp #' #' # Resample iris to get train and test data #' res <- resample(iris, setup_Resampler(seed = 2026)) #' iris_train <- iris[res[[1]], ] #' iris_test <- iris[-res[[1]], ] #' #' # Preprocess training data #' iris_pre <- preprocess(iris_train, prp) #' #' # Access preprocessd training data with `preprocessed()` #' preprocessed(iris_pre) #' #' # Apply the same preprocessing to test data #' # In this case, the scale and center values from training data will be used. #' # Note how `preprocess()` accepts either a `PreprocessorConfig` or `Preprocessor` object for #' # this reason. #' iris_test_pre <- preprocess(iris_test, iris_pre) #' #' # Access preprocessed test data #' preprocessed(iris_test_pre) preprocess <- new_generic("preprocess", c("x", "config")) # %% train_ ---- #' Generic for training supervised learning models #' #' @description #' Internal S7 generic that dispatches algorithm-specific training based on #' `Hyperparameters` class. Called by `train()`. #' #' @param hyperparameters `Hyperparameters` object: Algorithm-specific hyperparameters. #' @param x tabular data: Training set. #' @param weights Optional Numeric vector: Case weights. #' @param dat_validation Optional tabular data: Validation set for algorithms that support early stopping. #' @param verbosity Integer: Verbosity level. #' #' @return Algorithm-specific fitted model object. #' #' @author EDG #' @keywords internal #' @noRd train_ <- new_generic( "train_", "hyperparameters", function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { S7_dispatch() } ) # /rtemis::train_ # %% predict_super ---- #' Predict from supervised learning model (internal) #' #' @description #' Internal S7 generic that dispatches algorithm-specific prediction based on #' model class. #' #' @param model Fitted model object. #' @param newdata tabular data: New data for prediction. #' @param type Character: Type of supervised learning ("Classification" or "Regression"). #' @param ... Additional arguments (not currently used). #' #' @return Predictions (class probabilities for classification, numeric for regression). #' #' @author EDG #' @keywords internal #' @noRd predict_super <- new_generic( "predict_super", "model", function(model, newdata, type = NULL, verbosity = 0L) { S7_dispatch() } ) # /rtemis::predict_super # %% varimp_super ---- #' Get variable importance (internal) #' #' @description #' Internal S7 generic that dispatches algorithm-specific variable importance #' extraction based on model class. #' #' @param object Fitted model object. #' #' @return Numeric vector of variable importance scores (named by feature). #' #' @author EDG #' @keywords internal #' @noRd varimp_super <- new_generic( "varimp_super", "model", function(model, ...) { S7_dispatch() } ) # /rtemis::varimp_super # %% se_super ---- #' Get standard errors of predictions (internal) #' #' @description #' Internal S7 generic for extracting standard errors from regression models. #' #' @param object Fitted model object. #' @param newdata tabular data: New data for prediction. #' #' @return Numeric vector of standard errors. #' #' @author EDG #' @keywords internal #' @noRd se_super <- new_generic( "se_super", "model", function(model, newdata) { S7_dispatch() } ) # %% se ---- # Standard error of the fit. se <- new_generic("se", "x") # %% decomp_ ---- #' Generic for decomposition #' #' @author EDG #' @keywords internal #' @noRd decomp_ <- new_generic( "decomp_", "config", function(config, x, verbosity = 1L) { S7_dispatch() } ) # /rtemis::decomp_ # %% cluster_ ---- #' Generic for clustering #' #' @author EDG #' @keywords internal #' @noRd cluster_ <- new_generic( "cluster_", "config", function(config, x, verbosity = 1L) { S7_dispatch() } ) # /rtemis::cluster_ # %% desc ---- #' Short description for inline printing. #' This is like `repr` for single-line descriptions. #' #' @author EDG #' @keywords internal #' @noRd desc <- new_generic("desc", "x") # %% get_metric ---- #' Get metric #' #' @author EDG #' @keywords internal #' @noRd get_metric <- new_generic("get_metric", "x") # %% validate_hyperparameters ---- #' Check hyperparameters given training data #' #' @param x tabular data: Training data. #' @param hyperparameters `Hyperparameters` to check. #' #' @author EDG #' @keywords internal #' @noRd validate_hyperparameters <- new_generic( "validate_hyperparameters", "x", function(x, hyperparameters) { S7_dispatch() } ) # /rtemis::validate_hyperparameters # %% plot_metric ---- #' Plot Metric #' #' @description #' Plot metric for `SupervisedRes` objects. #' #' @param x `SupervisedRes` object. #' @param ... Additional arguments passed to the plotting function. #' #' @return plotly object #' #' @author EDG #' @keywords internal #' @noRd plot_metric <- new_generic("plot_metric", "x") # %% plot_roc ---- #' Plot ROC curve #' #' @description #' This generic is used to plot the ROC curve for a model. #' #' @param x `Classification` or `ClassificationRes` object. #' @param ... Additional arguments passed to the plotting function. #' #' @return A plotly object containing the ROC curve. #' #' @author EDG #' @export #' #' @examples #' ir <- iris[51:150, ] #' ir[["Species"]] <- factor(ir[["Species"]]) #' species_glm <- train(ir, algorithm = "GLM") #' plot_roc(species_glm) plot_roc <- new_generic("plot_roc", "x") # %% plot_varimp ---- #' Plot Variable Importance #' #' @description #' Plot Variable Importance for Supervised objects. #' #' @param x `Supervised` or `SupervisedRes` object. #' @param ... Additional arguments passed to methods. #' #' @details #' This method calls [draw_varimp] internally. #' If you pass an integer to the `plot_top` argument, the method will plot this many top features. #' If you pass a number between 0 and 1 to the `plot_top` argument, the method will plot this #' fraction of top features. #' #' @return plotly object or invisible NULL if no variable importance is available. #' #' @author EDG #' @export #' #' @seealso [draw_varimp], which is called by this method #' #' @examplesIf interactive() #' ir <- set_outcome(iris, "Sepal.Length") #' seplen_cart <- train(ir, algorithm = "CART") #' plot_varimp(seplen_cart) #' # Plot horizontally #' plot_varimp(seplen_cart, orientation = "h") #' plot_varimp(seplen_cart, orientation = "h", plot_top = 3L) #' plot_varimp(seplen_cart, orientation = "h", plot_top = 0.5) plot_varimp <- new_generic("plot_varimp", "x") # %% plot_true_pred ---- #' Plot True vs. Predicted Values #' #' @description #' Plot True vs. Predicted Values for Supervised objects. #' For classification, it plots a confusion matrix. #' For regression, it plots a scatter plot of true vs. predicted values. #' #' @param x `Supervised` or `SupervisedRes` object. #' @param ... Additional arguments passed to methods. #' #' @return plotly object. #' #' @author EDG #' @export #' #' @examples #' x <- set_outcome(iris, "Sepal.Length") #' sepallength_glm <- train(x, algorithm = "GLM") #' plot_true_pred(sepallength_glm) plot_true_pred <- new_generic("plot_true_pred", "x") # %% plot_manhattan ---- #' Manhattan plot #' #' @description #' Draw a Manhattan plot for `MassGLM` objects created with [massGLM]. #' #' @param x `MassGLM` object. #' @param ... Additional arguments passed to methods. #' #' @return plotly object. #' #' @author EDG #' @export # example included in `plot_manhattan.MassGLM` method. plot_manhattan <- new_generic("plot_manhattan", "x") # %% describe ---- #' Describe object #' #' @param x R object to describe. See method documentation for supported classes. #' @param ... Additional arguments passed to methods. See details. #' #' @details #' Extra arguments for `factor` method: #' - `max_n`: Integer: Return counts for up to this many levels. #' - `return_ordered`: Logical: If TRUE, return levels ordered by count, otherwise return in level order. #' - `verbosity`: Integer: Verbosity level. #' #' @author EDG #' @export #' #' @examples #' # --- For `Supervised` objects --- #' species_lightrf <- train(iris, algorithm = "lightrf") #' describe(species_lightrf) #' #' # --- For `SupervisedRes` objects --- #' mod <- train(iris, algorithm = "CART", outer_resampling_config = setup_Resampler()) #' describe(mod) #' #' # --- For factors --- #' # Small number of levels #' describe(iris[["Species"]]) #' #' # Large number of levels: show top n by count #' x <- factor(sample(letters, 1000, TRUE)) #' describe(x) #' describe(x, 3) #' describe(x, 3, return_ordered = FALSE) describe <- new_generic("describe", "x") # %% present ---- #' Present rtemis object #' #' @description #' This generic is used to present an rtemis object by printing to console and drawing plots. #' #' @param x `Supervised` or `SupervisedRes` object or list of such objects. #' @param ... Additional arguments passed to the plotting function. #' #' @return A plotly object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' ir <- set_outcome(iris, "Sepal.Length") #' seplen_lightrf <- train(ir, algorithm = "lightrf") #' present(seplen_lightrf) present <- new_generic("present", "x") # %% get_hyperparams_need_tuning ---- #' Get hyperparameters that need tuning. #' #' @return Character vector of hyperparameter names that need tuning. #' #' @author EDG #' @keywords internal #' @noRd get_hyperparams_need_tuning <- new_generic("get_hyperparams_need_tuning", "x") # %% get_hyperparams ---- #' Get hyperparameters. #' #' @author EDG #' @keywords internal #' @noRd get_hyperparams <- new_generic("get_hyperparams", c("x", "param_names")) # %% extract_rules ---- #' Extract rules from a model. #' #' @author EDG #' @keywords internal #' @noRd extract_rules <- new_generic("extract_rules", "x") # %% get_factor_levels ---- #' @name get_factor_levels #' #' @title #' Get factor levels from data.frame or similar #' #' @usage #' get_factor_levels(x) #' #' @param x tabular data. #' #' @return Named list of factor levels. Names correspond to column names. #' #' @author EDG #' @keywords internal #' @noRd get_factor_levels <- new_generic( "get_factor_levels", "x", function(x) S7_dispatch() ) method(get_factor_levels, class_data.frame) <- function(x) { factor_index <- which(sapply(x, is.factor)) lapply(x[, factor_index, drop = FALSE], levels) } method(get_factor_levels, class_data.table) <- function(x) { factor_index <- which(sapply(x, is.factor)) lapply(x[, factor_index, with = FALSE], levels) } # %% to_html ---- #' Convert to HTML #' #' @author EDG #' @keywords internal #' @noRd to_html <- new_generic("to_html", "x") # %% to_toml ---- #' Convert to TOML #' #' @author EDG #' @keywords internal #' @noRd to_toml <- new_generic("to_toml", "x") # %% to_yaml ---- #' Convert to YAML #' #' @author EDG #' @keywords internal #' @noRd to_yaml <- new_generic("to_yaml", "x") # %% to_json ---- #' Convert to JSON-serializable list #' #' Convert an rtemis S7 object to a named list suitable for #' `jsonlite::toJSON(auto_unbox = TRUE)`. Used by the rtemislive backend #' to send structured results to the browser frontend without scraping #' R console output. #' #' Each output list includes a `.class` field equal to the most specific #' S7 class name, allowing the frontend to dispatch to a class-specific #' renderer. #' #' The default method walks `props(x)`, recursing into S7-typed properties #' and passing through primitive properties as-is. Per-class methods #' override where the default isn't appropriate (e.g. classes whose props #' include a `data.table`, an opaque model fit, or where some props should #' be excluded for size or relevance reasons). #' #' @param x rtemis S7 object. #' @param ... Additional arguments passed to method. #' #' @return Named list. Pass through `jsonlite::toJSON(auto_unbox = TRUE)` #' for serialization. #' #' @author EDG #' @keywords internal #' @export to_json <- new_generic("to_json", "x") # %% to_json default ---- #' @name to_json #' @keywords internal #' @noRd method(to_json, S7_object) <- function(x, ...) { ps <- props(x) body <- lapply(ps, .to_json_value) c(list(.class = S7_class(x)@name), body) } # /rtemis::to_json.S7_object #' Recursively convert a value to a JSON-serializable form #' #' Handles the common composite shapes encountered when walking S7 props: #' nested S7 objects (recurse via the generic), lists that may *contain* #' S7 objects (recurse element-wise), and primitives / data.frames #' (pass through — jsonlite supports them natively). #' #' @param v Value from an S7 property. #' #' @return JSON-serializable value. #' #' @author EDG #' @keywords internal #' @noRd .to_json_value <- function(v) { if (is.null(v)) { return(NULL) } if (S7_inherits(v)) { return(to_json(v)) } # data.frame / data.table are list-like but jsonlite handles them natively. if (is.list(v) && !is.data.frame(v)) { return(lapply(v, .to_json_value)) } v } # /rtemis::.to_json_value # %% write_toml ---- #' @name #' write_toml #' #' @title #' Write to TOML file #' #' @author EDG #' @export # examples include in method documentation write_toml <- new_generic( "write_toml", "x", function(x, file, overwrite = FALSE, verbosity = 1L) { S7_dispatch() } ) # /rtemis::write_toml # %% inc ---- #' Select (include) columns by character or numeric vector. #' #' @param x tabular data. #' @param idx Character or numeric vector: Column names or indices to include. #' #' @return data.frame, tibble, or data.table. #' #' @author EDG #' @export #' #' @examples #' inc(iris, c(3, 4)) |> head() #' inc(iris, c("Sepal.Length", "Species")) |> head() inc <- new_generic("inc", "x", function(x, idx) { S7_dispatch() }) # %% exc ---- #' Exclude columns by character or numeric vector. #' #' @param x tabular data. #' @param idx Character or numeric vector: Column names or indices to exclude. #' #' @return data.frame, tibble, or data.table. #' #' @author EDG #' @export #' #' @examples #' exc(iris, "Species") |> head() #' exc(iris, c(1, 3)) |> head() exc <- new_generic("exc", c("x", "idx"), function(x, idx) { S7_dispatch() }) method(inc, class_data.frame) <- function(x, idx) { x[, idx, drop = FALSE] } method(inc, class_data.table) <- function(x, idx) { x[, .SD, .SDcols = idx] } method(exc, list(class_data.frame, class_character)) <- function(x, idx) { x[, -which(names(x) %in% idx), drop = FALSE] } method(exc, list(class_data.frame, class_integer)) <- function(x, idx) { x[, -idx, drop = FALSE] } method(exc, list(class_data.frame, class_double)) <- function(x, idx) { idx <- clean_int(idx) x[, -idx, drop = FALSE] } method( exc, list(class_data.table, class_character | class_integer) ) <- function(x, idx) { x[, .SD, .SDcols = -idx] } method(exc, list(class_data.table, class_double)) <- function(x, idx) { idx <- clean_int(idx) x[, .SD, .SDcols = -idx] } # %% outcome_name ---- #' Get the name of the last column #' #' @details #' This applied to tabular datasets used for supervised learning in rtemis, #' where, by convention, the last column is the outcome variable and all other columns #' are features. #' #' @param x tabular data. #' #' @return Name of the last column. #' #' @author EDG #' @export #' #' @examples #' outcome_name(iris) outcome_name <- new_generic("outcome_name", "x", function(x) { S7_dispatch() }) method(outcome_name, class_data.frame) <- function(x) { names(x)[NCOL(x)] } # /rtemis::outcome_name # %% outcome ---- #' Get the outcome as a vector #' #' Returns the last column of `x`, which is by convention the outcome variable. #' #' @details #' This applied to tabular datasets used for supervised learning in rtemis, #' where, by convention, the last column is the outcome variable and all other columns #' are features. #' #' @param x tabular data. #' #' @return Vector containing the last column of `x`. #' #' @author EDG #' @export #' #' @examples #' outcome(iris) outcome <- new_generic("outcome", "x", function(x) { S7_dispatch() }) # /rtemis::outcome method(outcome, class_data.frame) <- function(x) { x[[NCOL(x)]] } # %% features ---- #' Get features from tabular data #' #' Returns all columns except the last one. #' #' @details #' This can be applied to tabular datasets used for supervised learning in \pkg{rtemis}, #' where, by convention, the last column is the outcome variable and all other columns #' are features. #' #' @param x tabular data: Input data to get features from. #' #' @return Object of the same class as the input, after removing the last column. #' #' @author EDG #' @export #' #' @examples #' features(iris) |> head() features <- new_generic("features", "x", function(x) { S7_dispatch() }) # /rtemis::features method(features, class_data.frame) <- function(x) { if (NCOL(x) < 2) { cli::cli_abort("Input must have at least 2 columns.") } x[, -NCOL(x), drop = FALSE] } method(features, class_data.table) <- function(x) { if (NCOL(x) < 2) { cli::cli_abort("Input must have at least 2 columns.") } x[, -NCOL(x), with = FALSE] } # /rtemis::features.class_data.table # %% feature_names ---- #' Get feature names #' #' Returns all column names except the last one #' #' @details #' This applied to tabular datasets used for supervised learning in rtemis, #' where, by convention, the last column is the outcome variable and all other columns #' are features. #' #' @param x tabular data. #' #' @return Character vector of feature names. #' #' @author EDG #' @export #' #' @examples #' feature_names(iris) feature_names <- new_generic("feature_names", "x", function(x) { S7_dispatch() }) # /rtemis::feature_names method(feature_names, class_data.frame) <- function(x) { if (NCOL(x) < 2) { cli::cli_abort("Input must have at least 2 columns.") } names(x)[-NCOL(x)] } # /rtemis::feature_names.class_data.frame # %% check_factor_levels ---- #' Check factor levels #' #' @author EDG #' @keywords internal #' @noRd check_factor_levels <- new_generic("check_factor_levels", c("x")) # %% get_factor_names ---- #' Get factor names #' #' @details #' This applied to tabular datasets used for supervised learning in rtemis, #' where, by convention, the last column is the outcome variable and all other columns #' are features. #' #' @param x tabular data. #' #' @return Character vector of factor names. #' #' @author EDG #' @export #' #' @examples #' get_factor_names(iris) get_factor_names <- new_generic("get_factor_names", "x", function(x) { S7_dispatch() }) # /rtemis::get_factor_names method(get_factor_names, class_data.frame) <- function(x) { names(x)[sapply(x, is.factor)] } # %% calibrate ---- #' Calibrate `Classification` & `ClassificationRes` Models #' #' @description #' Generic function to calibrate binary classification models. #' #' @param x `Classification` or `ClassificationRes` object to calibrate. #' @param algorithm Character: Algorithm to use to train calibration model. #' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions. #' @param verbosity Integer: Verbosity level. #' @param ... Additional arguments passed to specific methods. #' #' @section Method-specific parameters: #' #' **For `Classification` objects:** #' * `predicted_probabilities`: Numeric vector of predicted probabilities #' * `true_labels`: Factor of true class labels #' #' **For `ClassificationRes` objects:** #' * `resampler_config`: `ResamplerConfig` object for calibration training #' * `train_verbosity`: Integer controlling calibration model training output #' #' @details #' The goal of calibration is to adjust the predicted probabilities of a binary classification #' model so that they better reflect the true probabilities (i.e. empirical risk) of the positive #' class. #' #' @return Calibrated model object. #' #' @author EDG #' @export #' #' @examples #' # --- Calibrate Classification --- #' dat <- iris[51:150, ] #' res <- resample(dat) #' dat$Species <- factor(dat$Species) #' dat_train <- dat[res[[1]], ] #' dat_test <- dat[-res[[1]], ] #' #' # Train GLM on a training/test split #' mod_c_glm <- train( #' x = dat_train, #' dat_test = dat_test, #' algorithm = "glm" #' ) #' #' # Calibrate the `Classification` by defining `predicted_probabilities` and `true_labels`, #' # in this case using the training data, but it could be a separate calibration dataset. #' mod_c_glm_cal <- calibrate( #' mod_c_glm, #' predicted_probabilities = mod_c_glm$predicted_prob_training, #' true_labels = mod_c_glm$y_training #' ) #' mod_c_glm_cal #' #' # --- Calibrate ClassificationRes --- #' #' # Train GLM with cross-validation #' resmod_c_glm <- train( #' x = dat, #' algorithm = "glm", #' outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold") #' ) #' #' # Calibrate the `ClassificationRes` using the same resampling configuration as used for training. #' resmod_c_glm_cal <- calibrate(resmod_c_glm) #' resmod_c_glm_cal calibrate <- new_generic( "calibrate", ("x"), function( x, algorithm = "isotonic", hyperparameters = NULL, verbosity = 1L, ... ) { S7_dispatch() } ) # /rtemis::calibrate # %% freeze ---- #' Freeze Hyperparameters #' #' @param x `Hyperparameters` object. #' #' @author EDG #' @keywords internal #' @noRd freeze <- new_generic("freeze", "x") # %% lock ---- #' Lock Hyperparameters #' #' @param x `Hyperparameters` object. #' #' @author EDG #' @keywords internal #' @noRd lock <- new_generic("lock", "x") # %% needs_tuning ---- #' needs_tuning #' #' @keywords internal #' @noRd needs_tuning <- new_generic("needs_tuning", "x") # %% get_factor_levels ---- #' @name get_factor_levels #' #' @title #' Get factor levels from data.frame or similar #' #' @usage #' get_factor_levels(x) #' #' @param x tabular data. #' #' @return Named list of factor levels. Names correspond to column names. #' #' @author EDG #' @keywords internal #' @noRd get_factor_levels <- new_generic( "get_factor_levels", "x", function(x) S7_dispatch() ) method(get_factor_levels, class_data.frame) <- function(x) { factor_index <- which(sapply(x, is.factor)) lapply(x[, factor_index, drop = FALSE], levels) } method(get_factor_levels, class_data.table) <- function(x) { factor_index <- which(sapply(x, is.factor)) # with = FALSE slightly more performance than using .SD lapply(x[, factor_index, with = FALSE], levels) } # %% is_tuned ---- is_tuned <- new_generic("is_tuned", "x") # %% get_tuned_status ---- get_tuned_status <- new_generic("get_tuned_status", "x") # %% one_hot ---- one_hot <- new_generic("one_hot", "x") # --- Custom S7 validators ------------------------------------------------------------------------- # %% scalar_dbl ---- #' Scalar double #' #' @author EDG #' @keywords internal #' @noRd scalar_dbl <- S7::new_property( class = S7::class_double | NULL, validator = function(value) { if (!is.null(value)) { if (length(value) != 1) { "must be a scalar double." } else if (!is.double(value)) { "must be double." } } } ) # /rtemis::scalar_dbl # %% scalar_dbl_01excl ---- #' Scalar double between 0 and 1, exclusive #' #' @author EDG #' @keywords internal #' @noRd scalar_dbl_01excl <- S7::new_property( class = S7::class_double | NULL, validator = function(value) { if (!is.null(value)) { if (length(value) != 1) { "must be a scalar double." } else if (value <= 0 || value >= 1) { "must be between > 0 and < 1." } } } ) # /rtemis::scalar_dbl_01excl # %% scalar_dbl_01incl ---- #' Scalar double between 0 and 1, inclusive #' #' @author EDG #' @keywords internal #' @noRd scalar_dbl_01incl <- S7::new_property( class = S7::class_double | NULL, validator = function(value) { if (!is.null(value)) { if (length(value) != 1) { "must be a scalar double." } else if (value < 0 || value > 1) { "must be between >= 0 and <= 1." } } } ) # /rtemis::scalar_dbl_01incl # %% scalar_int ---- #' Scalar integer #' #' @author EDG #' @keywords internal #' @noRd scalar_int <- S7::new_property( class = S7::class_integer | NULL, validator = function(value) { if (!is.null(value)) { if (length(value) != 1) { "must be a scalar integer." } } } ) # /rtemis::scalar_int # %% scalar_int_pos ---- #' Scalar positive integer #' #' @author EDG #' @keywords internal #' @noRd scalar_int_pos <- S7::new_property( class = S7::class_integer | NULL, validator = function(value) { if (!is.null(value)) { if (length(value) != 1) { "must be a positive integer scalar." } else if (value < 0) { "must be >= 0." } } } ) # /rtemis::scalar_int_pos # %% preprocessed ---- #' Get preprocessed data from `Preprocessor`. #' #' Returns the preprocessed data from a `Preprocessor` object. #' #' @param x `Preprocessor`: A `Preprocessor` object. #' #' @return data.frame: The preprocessed data. #' #' @export #' #' @examples #' prp <- preprocess(iris, setup_Preprocessor(scale = TRUE, center = TRUE)) #' preprocessed(prp) preprocessed <- new_generic("preprocessed", "x", function(x) { S7_dispatch() }) # /rtemis::preprocessed # --- Internal functions --------------------------------------------------------------------------- #' Get output type #' #' Get output type for printing text. #' #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' @param filename Character: Filename for output. #' #' @return Character with selected output type. #' #' @author EDG #' #' @keywords internal #' @noRd get_output_type <- function( output_type = c("ansi", "html", "plain"), filename = NULL ) { if (!is.null(filename)) { return("plain") } if (is.null(output_type)) { if (interactive()) { return("ansi") } else { return("plain") } } match.arg(output_type) } # /rtemis::get_output_type # %% S7_to_list ---- S7_to_list <- function(x) { if (S7_inherits(x)) { x <- props(x) } if (is.list(x)) { x <- lapply(x, S7_to_list) } x } # /rtemis::S7_to_list # %% toml_empty_to_null ---- toml_empty_to_null <- function(x) { if (!is.list(x)) { return(x) } if (length(x) == 0L) { return(NULL) } if (is.null(names(x))) { scalar_types <- vapply( x, function(el) { is.atomic(el) && length(el) == 1L && !is.null(el) }, logical(1) ) if (all(scalar_types)) { return(unlist(x, use.names = FALSE)) } } lapply(x, toml_empty_to_null) } # /rtemis::toml_empty_to_null # %% write_lines ---- #' Write lines to file #' #' Normalizes path, check if directory exists, creates it if necessary, #' writes lines to file, and checks if file was created successfully. #' #' @param x Character: Text to write to file. #' @param file Character: Path to output file. #' @param verbosity Integer: Verbosity level. #' #' @return Invisible NULL. Called for side effect of writing to file. #' #' @author EDG #' @keywords internal #' @noRd write_lines <- function(x, file, overwrite = FALSE, verbosity = 1L) { # Normalize path file <- normalizePath(file, mustWork = FALSE) # Check if file exists if (file.exists(file)) { if (overwrite) { if (verbosity >= 1L) { msg(fmt( paste("Overwriting existing file:", file), col = rtemis_colors[["orange"]] )) } } else { cli::cli_abort( "File already exists: {file}. Set `overwrite = TRUE` to overwrite." ) } } # Get directory name dir <- dirname(file) # Check if directory exists, create it if not if (!dir.exists(dir)) { dir.create(dir, recursive = TRUE) if (!dir.exists(dir)) { cli::cli_abort("Failed to create directory: {dir}") } else { if (verbosity >= 1L) { msg(checkmark(), "Created directory:", dir) } } } # Write lines to file writeLines(x, con = file) # Check if file was created successfully if (!file.exists(file)) { cli::cli_abort("Failed to create file: {file}") } else { if (verbosity >= 1L) { msg(checkmark(), "Created file:", file) } } invisible(NULL) } # /rtemis::write_lines # %% toml_meta ---- #' @name #' toml_meta #' #' @title #' Write TOML metadata #' #' @description #' Creates named list which will become first TOML table in the following format: #' #' ```toml #' [_meta] #' package = "rtemis" #' package_version = "0.4.2" #' schema_version = "1.0" #' object_type = "SuperConfig" #' created_at = 2026-2-11T22:45:00Z #' ``` #' @param x Object to create metadata for. Class name will be included in metadata. #' @param schema_version Character: Version of the schema to include in metadata. #' #' @return Named list containing metadata. #' #' @author EDG #' @keywords internal #' @noRd toml_meta <- function(x, schema_version = "1.0") { list( `_meta` = list( package = "rtemis", package_version = as.character(packageVersion("rtemis")), schema_version = schema_version, object_type = S7_class(x)@name, created_at = format( Sys.time(), "%Y-%m-%dT%H:%M:%SZ", tz = "UTC" ) ) ) } # /rtemis::toml_meta # %% toml_with_meta ---- #' Create TOML string with metadata #' #' Creates a TOML string with an inline metadata table followed by the TOML representation of the #' object. #' #' @param x Object to convert to TOML. Class name will be included in metadata. #' #' #' @return Character string containing TOML representation of the object, with metadata included as #' an inline table at the top. #' #' @author EDG #' @keywords internal #' @noRd toml_with_meta <- function(x, payload, schema_version = "1.0") { meta_block <- toml::write_toml( toml_meta(x, schema_version = schema_version) ) meta_lines <- strsplit(meta_block, "\n", fixed = TRUE)[[1]] meta_lines <- meta_lines[meta_lines != "" & meta_lines != "[_meta]"] meta_inline <- paste0( "_meta = { ", paste(meta_lines, collapse = ", "), " }" ) payload_str <- toml::write_toml(payload) paste(meta_inline, payload_str, sep = "\n\n") } # /rtemis::toml_with_meta ================================================ FILE: R/01_ExecutionConfig.R ================================================ # ExecutionConfig.R # ::rtemis:: # 2026- EDG rtemis.org # %% ExecutionConfig ---- #' ExecutionConfig Class #' #' @description #' Execution Configuration Class, defining sequential/parallel/distributed execution settings. #' #' @author EDG #' @noRd ExecutionConfig <- new_class( name = "ExecutionConfig", properties = list( backend = class_character, n_workers = class_integer, future_plan = class_character | NULL ), constructor = function(backend, n_workers, future_plan) { n_workers <- clean_int(n_workers) check_character(backend, allow_null = FALSE) check_character(future_plan, allow_null = TRUE) new_object( S7::S7_object(), backend = backend, n_workers = n_workers, future_plan = future_plan ) }, validator = function(self) { if (self@backend == "future" && is.null(self@future_plan)) { "@future_plan must be set when backend is 'future'." } else if (self@backend == "none" && self@n_workers != 1L) { "n_workers must be 1 when backend is 'none'." } else if (self@backend == "mirai" && self@n_workers < 1L) { "n_workers must be at least 1 when backend is 'mirai'." } else if (self@backend == "future" && self@n_workers < 1L) { "n_workers must be at least 1 when backend is 'future'." } } ) # /rtemis::ExecutionConfig # %% repr.ExecutionConfig ---- method(repr, ExecutionConfig) <- function(x, pad = 0L, output_type = NULL) { out <- repr_S7name("ExecutionConfig", pad = pad, output_type = output_type) .props <- props(x) if (.props[["backend"]] != "future") { .props[["future_plan"]] <- NULL } out <- paste0( out, repr_ls(.props, pad = pad, output_type = output_type) ) } # /rtemis::repr.ExecutionConfig # %% print.ExecutionConfig ---- method(print, ExecutionConfig) <- function(x, output_type = NULL, ...) { cat(repr(x, output_type = output_type), "\n") invisible(x) } # /rtemis::print.ExecutionConfig # %% --- User API ---- # %% setup_ExecutionConfig ---- #' Setup Execution Configuration #' #' @param backend Character: Execution backend: "future", "mirai", or "none". #' @param n_workers Integer: Number of workers for parallel execution. Only used if `backend is #' "future"` or "mirai". Do not rely on the default value, set to an appropriate number depending #' on your system. #' @param future_plan Character: Future plan to use if `backend` is "future". #' #' @return `ExecutionConfig` object. #' #' @author EDG #' @export #' #' @examples #' setup_ExecutionConfig(backend = "future", n_workers = 4L, future_plan = "multisession") setup_ExecutionConfig <- function( backend = c("future", "mirai", "none"), n_workers = NULL, future_plan = NULL ) { backend <- match.arg(backend) if (backend == "future") { check_dependencies("futurize") check_character(future_plan, allow_null = TRUE) if (is.null(future_plan)) { future_plan <- getOption("future.plan", "mirai_multisession") } if (!future_plan %in% ALLOWED_PLANS) { cli::cli_abort( "{.val {future_plan}} is not an allowed future plan. Allowed plans: {.val {ALLOWED_PLANS}}." ) } if (is.null(n_workers)) { n_workers <- parallelly::availableCores(omit = 3L) } } else if (backend == "mirai") { check_dependencies("mirai") if (is.null(n_workers)) { n_workers <- parallelly::availableCores(omit = 3L) } } else if (backend == "none") { if (is.null(n_workers)) { n_workers <- 1L } else if (n_workers != 1L) { cli::cli_abort("n_workers must be 1 when backend is 'none'.") } } n_workers <- clean_int(n_workers) if (n_workers < 1L) { cli::cli_abort("n_workers must be at least 1.") } ExecutionConfig( backend = backend, n_workers = n_workers, future_plan = if (backend == "future") future_plan else NULL ) } # /rtemis::setup_ExecutionConfig ================================================ FILE: R/02_Hyperparameters.R ================================================ # S7_Hyperparameters.R # ::rtemis:: # 2025- EDG rtemis.org # References ---- # S7 # - https://github.com/RConsortium/S7 # - https://rconsortium.github.io/S7/ # LightGBM parameters # - https://lightgbm.readthedocs.io/en/latest/Parameters.html # %% Constants ---- # `tuned` values ---- # -9: Set by Tuner: Actively being tuned (Values fixed by Tuner). # -2: Set by constructor: Not tunable (No tunable_hyperparameters). # -1: Set by constructor: Not tunable (tunable_hyperparameters exist, but none of them have more than one value). # 0: Set by constructor: Untuned but tunable (at least one of tunable_hyperparameters has more than one value). # 1: Set by Tuner: Tuned (Started as 0, set to 1 when tuned). TUNED_STATUS_TUNING <- -9L TUNED_STATUS_NOT_TUNABLE <- -2L TUNED_STATUS_NO_SEARCH_VALUES <- -1L TUNED_STATUS_UNTUNED <- 0L TUNED_STATUS_TUNED <- 1L # `resampled` values ---- # 0: Running on single training set. # 1: Running on resampled training sets. # %% Hyperparameters ---- #' @title Hyperparameters #' #' @description #' Superclass for hyperparameters. #' #' @field algorithm Character: Algorithm name. #' @field hyperparameters Named list of algorithm hyperparameter values. #' @field tunable_hyperparameters Character: Names of tunable hyperparameters. #' @field fixed_hyperparameters Character: Names of fixed hyperparameters. #' @field tuned Integer: Tuning status. #' @field resampled Integer: Outer resampling status. #' @field n_workers Integer: Number of workers to use for tuning. #' #' @author EDG #' @noRd Hyperparameters <- new_class( name = "Hyperparameters", properties = list( algorithm = class_character, hyperparameters = class_list, tunable_hyperparameters = class_character, fixed_hyperparameters = class_character, tuned = class_integer, resampled = class_integer, n_workers = class_integer ), constructor = function( algorithm, hyperparameters, tunable_hyperparameters, fixed_hyperparameters, n_workers = 1L ) { # Test if any tunable_hyperparameters have more than one value if (length(tunable_hyperparameters) > 0) { if (any(sapply(hyperparameters[tunable_hyperparameters], length) > 1)) { tuned <- 0L # Search values defined for tunable hyperparameters. } else { tuned <- -1L # No search values defined for tunable hyperparameters. } } else { tuned <- -2L # No tunable hyperparameters } # GLMNET if (algorithm == "GLMNET") { if (is.null(hyperparameters[["lambda"]])) { tuned <- 0L } } # LightGBM if (algorithm == "LightGBM") { if (is.null(hyperparameters[["nrounds"]])) { tuned <- 0L } } # SVM # Check kernel-specific hyperparameters if (algorithm == "SVM") { # linear => cost if (hyperparameters[["kernel"]] == "linear") { if (length(hyperparameters[["cost"]]) > 1) { tuned <- 0L } } else if (hyperparameters[["kernel"]] == "polynomial") { if (length(hyperparameters[["degree"]]) > 1) { tuned <- 0L } } else if (hyperparameters[["kernel"]] == "radial") { if (length(hyperparameters[["sigma"]]) > 1) { tuned <- 0L } } } n_workers <- clean_posint(n_workers) new_object( S7_object(), algorithm = algorithm, hyperparameters = hyperparameters, tunable_hyperparameters = tunable_hyperparameters, fixed_hyperparameters = fixed_hyperparameters, tuned = tuned, resampled = 0L, n_workers = n_workers ) } ) # /rtemis::Hyperparameters # %% repr.Hyperparameters ---- #' Repr Hyperparameters #' #' repr method for Hyperparameters object. #' #' @param x `Hyperparameters` object. #' @param pad Integer: Left padding for printed output. #' @param maxlength Integer: Maximum length of items to show using `headdot()` before truncating with ellipsis. `-1` means no limit. #' @param limit Integer: Limit number of items to show. `-1` means no limit. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @author EDG #' @noRd method(repr, Hyperparameters) <- function( x, pad = 0L, maxlength = -1L, limit = -1L, output_type = NULL ) { output_type <- get_output_type(output_type) out <- repr_S7name( paste0(x@algorithm, "Hyperparameters"), pad = pad, output_type = output_type ) out <- paste0( out, repr_ls( props(x)[-1], pad = pad, maxlength = maxlength, limit = limit, output_type = output_type ) ) if (x@tuned == TUNED_STATUS_TUNING) { out <- paste0( out, fmt( "\n Hyperparameters are being tuned.\n", col = col_tuner, bold = TRUE, output_type = output_type ) ) } else if (x@tuned == TUNED_STATUS_NOT_TUNABLE) { out <- paste0( out, fmt( "\n No hyperparameters are tunable.\n", col = col_tuner, bold = TRUE, output_type = output_type ) ) } else if (x@tuned == TUNED_STATUS_UNTUNED) { need_tuning <- names(get_hyperparams_need_tuning(x)) out <- paste0( out, fmt( paste0( "\n ", ngettext(length(need_tuning), "Hyperparameter ", "Hyperparameters "), oxfordcomma( need_tuning ), ngettext(length(need_tuning), " needs ", " need "), "tuning.\n" ), col = col_tuner, bold = TRUE, output_type = output_type ) ) } else if (x@tuned == TUNED_STATUS_NO_SEARCH_VALUES) { out <- paste0( out, fmt( "\n No search values defined for tunable hyperparameters.\n", col = col_tuner, bold = TRUE, output_type = output_type ) ) } else if (x@tuned == TUNED_STATUS_TUNED) { out <- paste0( out, fmt( "\n Hyperparameters are tuned.\n", col = col_tuner, bold = TRUE, output_type = output_type ) ) } out } # /rtemis::repr.Hyperparameters # %% print.Hyperparameters ---- method(print, Hyperparameters) <- function(x, output_type = NULL, ...) { cat(repr(x, output_type = output_type)) invisible(x) } # /rtemis::print.Hyperparameters # %% is_tuned.Hyperparameters ---- method(is_tuned, Hyperparameters) <- function(x) { x@tuned == 1L } # /is_tuned.Hyperparameters # %% get_tuned_status.Hyperparameters ---- method(get_tuned_status, Hyperparameters) <- function(x) { if (length(x@tunable_hyperparameters) > 0) { if (any(sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1)) { 0L } else { -1L } } else { -2L } } # /rtemis::get_tuned_status.Hyperparameters # %% update.Hyperparameters ---- #' Update Hyperparameters #' #' @param x `Hyperparameters` object. #' @param hyperparameters Named list of algorithm hyperparameter values. #' #' @author EDG #' @keywords internal #' @noRd method(update, Hyperparameters) <- function( object, hyperparameters, tuned = NULL, ... ) { for (hp in names(hyperparameters)) { object@hyperparameters[[hp]] <- hyperparameters[[hp]] } # Update tuned status if (is.null(tuned)) { object@tuned <- get_tuned_status(object) } else { object@tuned <- tuned } object } # /rtemis::update.Hyperparameters # %% freeze.Hyperparameters ---- method(freeze, Hyperparameters) <- function(x) { x@tuned <- -1L } # /rtemis::freeze.Hyperparameters # %% lock.Hyperparameters ---- method(lock, Hyperparameters) <- function(x) { x@tuned <- 1L } # %% `$`.Hyperparameters ---- # Make Hyperparameters@hyperparameters@name `$`-accessible method(`$`, Hyperparameters) <- function(x, name) { x@hyperparameters[[name]] } # %% `.DollarNames`.Hyperparameters ---- # `$`-autocomplete Hyperparameters@hyperparameters method(`.DollarNames`, Hyperparameters) <- function(x, pattern = "") { all_names <- names(x@hyperparameters) grep(pattern, all_names, value = TRUE) } # %% `[[`.Hyperparameters ---- # Make Hyperparameters@hyperparameters@name `[[`-accessible method(`[[`, Hyperparameters) <- function(x, name) { x@hyperparameters[[name]] } # %% needs_tuning.Hyperparameters ---- method(needs_tuning, Hyperparameters) <- function(x) { x@tuned == 0 } # /rtemis::needs_tuning.Hyperparameters # %% get_hyperparams_need_tuning.Hyperparameters ---- #' Get hyperparameters that need tuning in an algorithm-specific way. #' #' @keywords internal #' @noRd method(get_hyperparams_need_tuning, Hyperparameters) <- function(x) { # -> list # Get tunable hyperparameters with more than one value x@hyperparameters[x@tunable_hyperparameters[ sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1 ]] } # /get_hyperparams_need_tuning.Hyperparameters # %% get_hyperparams.(Hyperparameters, class_character) ---- method(get_hyperparams, list(Hyperparameters, class_character)) <- function( x, param_names ) { sapply(param_names, function(p) x@hyperparameters[p], USE.NAMES = FALSE) } # /rtemis::get_hyperparams_need_tuning.Hyperparameters # %% GLMHyperparameters ---- #' @author EDG #' #' @keywords internal #' @noRd GLMHyperparameters <- new_class( name = "GLMHyperparameters", parent = Hyperparameters, constructor = function(ifw) { new_object( Hyperparameters( algorithm = "GLM", hyperparameters = list( ifw = ifw ), tunable_hyperparameters = "ifw", fixed_hyperparameters = character() ) ) } # /constructor ) # /rtemis::GLMHyperparameters # %% setup_GLM ---- #' Setup GLM Hyperparameters #' #' Setup hyperparameters for GLM training. #' #' @param ifw (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return GLMHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' glm_hyperparams <- setup_GLM(ifw = TRUE) #' glm_hyperparams setup_GLM <- function(ifw = FALSE) { GLMHyperparameters(ifw = ifw) } # %% GAMHyperparameters ---- GAM_tunable <- c("k", "ifw") GAM_fixed <- character() #' @author EDG #' @keywords internal #' @noRd GAMHyperparameters <- new_class( name = "GAMHyperparameters", parent = Hyperparameters, constructor = function(k, ifw) { new_object( Hyperparameters( algorithm = "GAM", hyperparameters = list( k = k, ifw = ifw ), tunable_hyperparameters = GAM_tunable, fixed_hyperparameters = GAM_fixed ) ) } # /constructor ) # /rtemis::GAMHyperparameters # %% setup_GAM ---- #' Setup GAM Hyperparameters #' #' Setup hyperparameters for GAM training. #' #' Get more information from [mgcv::gam]. #' #' @param k (Tunable) Integer: Number of knots. #' @param ifw (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return GAMHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' gam_hyperparams <- setup_GAM(k = 5L, ifw = FALSE) #' gam_hyperparams setup_GAM <- function(k = 5L, ifw = FALSE) { k <- clean_posint(k) GAMHyperparameters(k = k, ifw = ifw) } # %% CARTHyperparameters ---- CART_tunable <- c("cp", "maxdepth", "minsplit", "minbucket", "prune_cp", "ifw") CART_fixed <- c( "method", "model", "maxcompete", "maxsurrogate", "usesurrogate", "surrogatestyle", "xval", "cost" ) #' @title CARTHyperparameters #' #' @description #' Hyperparameters subclass for CART. #' #' @author EDG #' @keywords internal #' @noRd CARTHyperparameters <- new_class( name = "CARTHyperparameters", parent = Hyperparameters, constructor = function( cp, maxdepth, minsplit, minbucket, prune_cp, method, model, maxcompete, maxsurrogate, usesurrogate, surrogatestyle, xval, cost, ifw ) { new_object( Hyperparameters( algorithm = "CART", hyperparameters = list( cp = cp, maxdepth = maxdepth, minsplit = minsplit, minbucket = minbucket, prune_cp = prune_cp, method = method, model = model, maxcompete = maxcompete, maxsurrogate = maxsurrogate, usesurrogate = usesurrogate, surrogatestyle = surrogatestyle, xval = xval, cost = cost, ifw = ifw ), tunable_hyperparameters = CART_tunable, fixed_hyperparameters = CART_fixed ) ) } # /constructor ) # /rtemis::CARTHyperparameters # %% setup_CART ---- #' Setup CART Hyperparameters #' #' Setup hyperparameters for CART training. #' #' Get more information from [rpart::rpart] and [rpart::rpart.control]. #' #' @param cp (Tunable) Numeric: Complexity parameter. #' @param maxdepth (Tunable) Integer: Maximum depth of tree. #' @param minsplit (Tunable) Integer: Minimum number of observations in a node to split. #' @param minbucket (Tunable) Integer: Minimum number of observations in a terminal node. #' @param prune_cp (Tunable) Numeric: Complexity for cost-complexity pruning after tree is built #' @param method String: Splitting method. #' @param model Logical: If TRUE, return a model. #' @param maxcompete Integer: Maximum number of competitive splits. #' @param maxsurrogate Integer: Maximum number of surrogate splits. #' @param usesurrogate Integer: Number of surrogate splits to use. #' @param surrogatestyle Integer: Type of surrogate splits. #' @param xval Integer: Number of cross-validation folds. #' @param cost Numeric (>=0): One for each feature. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return CARTHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' cart_hyperparams <- setup_CART(cp = 0.01, maxdepth = 10L, ifw = TRUE) #' cart_hyperparams setup_CART <- function( # tunable cp = 0.01, maxdepth = 20L, minsplit = 2L, minbucket = 1L, # round(minsplit / 3), prune_cp = NULL, # fixed method = "auto", model = TRUE, maxcompete = 4L, maxsurrogate = 5L, usesurrogate = 2L, surrogatestyle = 0L, xval = 0L, cost = NULL, ifw = FALSE ) { check_inherits(cp, "numeric") maxdepth <- clean_int(maxdepth) minsplit <- clean_int(minsplit) minbucket <- clean_int(minbucket) check_inherits(prune_cp, "numeric") check_inherits(method, "character") check_inherits(model, "logical") maxcompete <- clean_int(maxcompete) maxsurrogate <- clean_int(maxsurrogate) usesurrogate <- clean_int(usesurrogate) surrogatestyle <- clean_int(surrogatestyle) xval <- clean_int(xval) check_inherits(cost, "numeric") CARTHyperparameters( cp = cp, maxdepth = maxdepth, minsplit = minsplit, minbucket = minbucket, prune_cp = prune_cp, method = method, model = model, maxcompete = maxcompete, maxsurrogate = maxsurrogate, usesurrogate = usesurrogate, surrogatestyle = surrogatestyle, xval = xval, cost = cost, ifw = ifw ) } # /rtemis::setup_CART # Test that all CART hyperparameters are set by setup_CART stopifnot(all(c(CART_tunable, CART_fixed) %in% names(formals(setup_CART)))) # %% GLMNETHyperparameters ---- GLMNET_tunable <- c("alpha", "ifw") GLMNET_fixed <- c( "family", "offset", "which_lambda_cv", "nlambda", "penalty_factor", "standardize", "intercept" ) #' @title GLMNETHyperparameters #' #' @description #' Hyperparameters subclass for GLMNET. #' #' @author EDG #' @keywords internal #' @noRd GLMNETHyperparameters <- new_class( name = "GLMNETHyperparameters", parent = Hyperparameters, constructor = function( alpha, family, offset, which_lambda_cv, nlambda, lambda, penalty_factor, standardize, intercept, ifw ) { check_float01inc(alpha) check_inherits(which_lambda_cv, "character") nlambda <- clean_posint(nlambda) check_inherits(penalty_factor, "numeric") check_inherits(standardize, "logical") new_object( Hyperparameters( algorithm = "GLMNET", hyperparameters = list( alpha = alpha, family = family, offset = offset, which_lambda_cv = which_lambda_cv, nlambda = nlambda, lambda = lambda, penalty_factor = penalty_factor, standardize = standardize, intercept = intercept, ifw = ifw ), tunable_hyperparameters = GLMNET_tunable, fixed_hyperparameters = GLMNET_fixed ) ) } # /constructor ) # /rtemis::GLMNETHyperparameters #' Setup GLMNET Hyperparameters #' #' Setup hyperparameters for GLMNET training. #' #' Get more information from [glmnet::glmnet]. #' #' @param alpha (Tunable) Numeric: Mixing parameter. #' @param family Character: Family for GLMNET. #' @param offset Numeric: Offset for GLMNET. #' @param which_lambda_cv Character: Which lambda to use for prediction: #' "lambda.1se" or "lambda.min" #' @param nlambda Positive integer: Number of lambda values. #' @param lambda Numeric: Lambda values. #' @param penalty_factor Numeric: Penalty factor for each feature. #' @param standardize Logical: If TRUE, standardize features. #' @param intercept Logical: If TRUE, include intercept. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return GLMNETHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' glm_hyperparams <- setup_GLMNET(alpha = 1, ifw = TRUE) #' glm_hyperparams setup_GLMNET <- function( # tunable alpha = 1, # fixed family = NULL, offset = NULL, which_lambda_cv = "lambda.1se", nlambda = 100L, lambda = NULL, penalty_factor = NULL, standardize = TRUE, intercept = TRUE, ifw = TRUE ) { check_float01inc(alpha) check_inherits(which_lambda_cv, "character") nlambda <- clean_posint(nlambda) check_inherits(penalty_factor, "numeric") check_logical(standardize) check_logical(ifw) GLMNETHyperparameters( family = family, offset = offset, alpha = alpha, which_lambda_cv = which_lambda_cv, nlambda = nlambda, lambda = lambda, penalty_factor = penalty_factor, standardize = standardize, intercept = intercept, ifw = ifw ) } # /rtemis::setup_GLMNET # Test that all GLMNET hyperparameters are set by setup_GLMNET stopifnot(all( c(GLMNET_tunable, GLMNET_fixed) %in% names(formals(setup_GLMNET)) )) method(get_hyperparams_need_tuning, GLMNETHyperparameters) <- function(x) { # Get tunable hyperparameters with more than one value out <- x@hyperparameters[x@tunable_hyperparameters[ sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1 ]] if (is.null(x[["lambda"]])) { out <- c(out, list(lambda = NULL)) } out } # /rtemis::get_hyperparams_need_tuning.GLMNETHyperparameters # %% LightCARTHyperparameters ---- LightCART_tunable <- c( "num_leaves", "max_depth", "lambda_l1", "lambda_l2", "min_data_in_leaf", "max_cat_threshold", "min_data_per_group", "linear_tree", "ifw" ) LightCART_fixed <- c("objective") #' @title LightCARTHyperparameters #' #' @description #' Hyperparameters subclass for LightCART #' #' @author EDG #' @keywords internal #' @noRd LightCARTHyperparameters <- new_class( name = "LightCARTHyperparameters", parent = Hyperparameters, constructor = function( num_leaves, max_depth, lambda_l1, lambda_l2, min_data_in_leaf, max_cat_threshold, min_data_per_group, linear_tree, objective, ifw ) { new_object( Hyperparameters( algorithm = "LightCART", hyperparameters = list( num_leaves = num_leaves, max_depth = max_depth, lambda_l1 = lambda_l1, lambda_l2 = lambda_l2, min_data_in_leaf = min_data_in_leaf, max_cat_threshold = max_cat_threshold, min_data_per_group = min_data_per_group, linear_tree = linear_tree, objective = objective, ifw = ifw ), tunable_hyperparameters = LightCART_tunable, fixed_hyperparameters = LightCART_fixed ) ) } # /constructor ) # /rtemis::LightCARTHyperparameters # %% setup_LightCART ---- #' Setup LightCART Hyperparameters #' #' Setup hyperparameters for LightCART training. #' #' Get more information from [lightgbm::lgb.train]. #' #' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree. #' @param max_depth (Tunable) Integer: Maximum depth of trees. #' @param lambda_l1 (Tunable) Numeric: L1 regularization. #' @param lambda_l2 (Tunable) Numeric: L2 regularization. #' @param min_data_in_leaf (Tunable) Positive integer: Minimum number of data in a leaf. #' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features. #' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group. #' @param linear_tree (Tunable) Logical: If TRUE, use linear trees. #' @param objective Character: Objective function. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return LightCARTHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' lightcart_hyperparams <- setup_LightCART(num_leaves = 32L, ifw = FALSE) #' lightcart_hyperparams setup_LightCART <- function( num_leaves = 32L, max_depth = -1L, lambda_l1 = 0, lambda_l2 = 0, min_data_in_leaf = 20L, max_cat_threshold = 32L, min_data_per_group = 100L, linear_tree = FALSE, objective = NULL, ifw = FALSE ) { num_leaves <- clean_posint(num_leaves) max_depth <- clean_int(max_depth) check_float0pos(lambda_l1) check_float0pos(lambda_l2) min_data_in_leaf <- clean_posint(min_data_in_leaf) max_cat_threshold <- clean_posint(max_cat_threshold) min_data_per_group <- clean_posint(min_data_per_group) check_logical(linear_tree) LightCARTHyperparameters( num_leaves = num_leaves, max_depth = max_depth, lambda_l1 = lambda_l1, lambda_l2 = lambda_l2, min_data_in_leaf = min_data_in_leaf, max_cat_threshold = max_cat_threshold, min_data_per_group = min_data_per_group, linear_tree = linear_tree, objective = objective, ifw = ifw ) } # /rtemis::setup_LightCART # %% LightRFHyperparameters ---- LightRF_tunable <- c( "nrounds", "num_leaves", "max_depth", "feature_fraction", "subsample", "lambda_l1", "lambda_l2", "max_cat_threshold", "min_data_per_group", "ifw" ) LightRF_fixed <- c( "objective", "device_type", "tree_learner", "boosting_type", "learning_rate", "subsample_freq", "early_stopping_rounds", "force_col_wise" ) #' @title LightRFHyperparameters #' #' @description #' Hyperparameters subclass for LightRF #' #' @author EDG #' @keywords internal #' @noRd LightRFHyperparameters <- new_class( name = "LightRFHyperparameters", parent = Hyperparameters, constructor = function( nrounds, num_leaves, max_depth, feature_fraction, subsample, lambda_l1, lambda_l2, max_cat_threshold, min_data_per_group, linear_tree, ifw, # fixed objective, device_type, tree_learner, force_col_wise ) { new_object( Hyperparameters( algorithm = "LightRF", hyperparameters = list( nrounds = nrounds, num_leaves = num_leaves, max_depth = max_depth, feature_fraction = feature_fraction, subsample = subsample, lambda_l1 = lambda_l1, lambda_l2 = lambda_l2, max_cat_threshold = max_cat_threshold, min_data_per_group = min_data_per_group, linear_tree = linear_tree, ifw = ifw, # fixed objective = objective, device_type = device_type, tree_learner = tree_learner, force_col_wise = force_col_wise, # unsettable: LightGBM params for RF boosting_type = "rf", learning_rate = 1, # no effect? in boosting_type 'rf', but set for clarity subsample_freq = 1L, # a.k.a. bagging_freq early_stopping_rounds = -1L ), tunable_hyperparameters = LightRF_tunable, fixed_hyperparameters = LightRF_fixed ) ) } ) # /rtemis::LightRFHyperparameters # %% setup_LightRF ---- #' Setup LightRF Hyperparameters #' #' Setup hyperparameters for LightRF training. #' #' Get more information from [lightgbm::lgb.train]. #' Note that hyperparameters subsample_freq and early_stopping_rounds are fixed, #' and cannot be set because they are what makes `lightgbm` train a random forest. #' These can all be set when training gradient boosting with LightGBM. #' #' @param nrounds (Tunable) Positive integer: Number of boosting rounds. #' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree. #' @param max_depth (Tunable) Integer: Maximum depth of trees. #' @param feature_fraction (Tunable) Numeric: Fraction of features to use. #' @param subsample (Tunable) Numeric: Fraction of data to use. #' @param lambda_l1 (Tunable) Numeric: L1 regularization. #' @param lambda_l2 (Tunable) Numeric: L2 regularization. #' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features. #' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group. #' @param linear_tree Logical: If TRUE, use linear trees. #' @param objective Character: Objective function. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' @param device_type Character: "cpu" or "gpu". #' @param tree_learner Character: "serial", "feature", "data", or "voting". #' @param force_col_wise Logical: Use only with CPU - If TRUE, force col-wise histogram building. #' #' @return LightRFHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' lightrf_hyperparams <- setup_LightRF(nrounds = 1000L, ifw = FALSE) #' lightrf_hyperparams setup_LightRF <- function( nrounds = 500L, num_leaves = 4096L, max_depth = -1L, feature_fraction = 0.7, subsample = .623, # a.k.a. bagging_fraction lambda_l1 = 0, lambda_l2 = 0, max_cat_threshold = 32L, min_data_per_group = 32L, linear_tree = FALSE, ifw = FALSE, # fixed objective = NULL, device_type = "cpu", tree_learner = "serial", force_col_wise = TRUE ) { nrounds <- clean_posint(nrounds) num_leaves <- clean_posint(num_leaves) max_depth <- clean_int(max_depth) check_float01inc(feature_fraction) check_float01inc(subsample) check_float0pos(lambda_l1) check_float0pos(lambda_l2) max_cat_threshold <- clean_posint(max_cat_threshold) min_data_per_group <- clean_posint(min_data_per_group) check_logical(linear_tree) LightRFHyperparameters( nrounds = nrounds, num_leaves = num_leaves, max_depth = max_depth, feature_fraction = feature_fraction, subsample = subsample, lambda_l1 = lambda_l1, lambda_l2 = lambda_l2, max_cat_threshold = max_cat_threshold, min_data_per_group = min_data_per_group, linear_tree = linear_tree, ifw = ifw, objective = objective, device_type = device_type, tree_learner = tree_learner, force_col_wise = force_col_wise ) } # /rtemis::setupLightRF # Test that all LightRF hyperparameters are set by setup_LightRF # LightRF fixed hyperparameters are not editable. stopifnot(all(LightRF_tunable %in% names(formals(setup_LightRF)))) # %% LightGBMHyperparameters ---- LightGBM_tunable <- c( "num_leaves", "max_depth", "learning_rate", "feature_fraction", "subsample", "subsample_freq", "lambda_l1", "lambda_l2", "max_cat_threshold", "min_data_per_group", "linear_tree", "ifw" ) LightGBM_fixed <- c( "max_nrounds", "force_nrounds", "early_stopping_rounds", "objective", "device_type", "tree_learner", "force_col_wise" ) #' @title LightGBMHyperparameters #' #' @description #' Hyperparameters subclass for LightGBM #' #' @author EDG #' @keywords internal #' @noRd LightGBMHyperparameters <- new_class( name = "LightGBMHyperparameters", parent = Hyperparameters, constructor = function( max_nrounds, force_nrounds, early_stopping_rounds, # tunable num_leaves, max_depth, learning_rate, feature_fraction, subsample, subsample_freq, lambda_l1, lambda_l2, max_cat_threshold, min_data_per_group, linear_tree, ifw, objective, device_type, tree_learner, force_col_wise ) { nrounds <- if (!is.null(force_nrounds)) { force_nrounds } else { NULL } new_object( Hyperparameters( algorithm = "LightGBM", hyperparameters = list( nrounds = nrounds, max_nrounds = max_nrounds, force_nrounds = force_nrounds, early_stopping_rounds = early_stopping_rounds, num_leaves = num_leaves, max_depth = max_depth, learning_rate = learning_rate, feature_fraction = feature_fraction, subsample = subsample, subsample_freq = subsample_freq, lambda_l1 = lambda_l1, lambda_l2 = lambda_l2, max_cat_threshold = max_cat_threshold, min_data_per_group = min_data_per_group, linear_tree = linear_tree, ifw = ifw, objective = objective, device_type = device_type, tree_learner = tree_learner, force_col_wise = force_col_wise ), tunable_hyperparameters = LightGBM_tunable, fixed_hyperparameters = LightGBM_fixed ) ) } ) # /rtemis::LightGBMHyperparameters method(update, LightGBMHyperparameters) <- function( object, hyperparameters, tuned = NULL, ... ) { for (hp in names(hyperparameters)) { object@hyperparameters[[hp]] <- hyperparameters[[hp]] } # Update tuned status if (is.null(tuned)) { object@tuned <- get_tuned_status(object) } else { object@tuned <- tuned } # Update nrounds (e.g. in LightRuleFit) if ( is.null(object@hyperparameters[["nrounds"]]) && !is.null(object@hyperparameters[["force_nrounds"]]) ) { object@hyperparameters[["nrounds"]] <- object@hyperparameters[[ "force_nrounds" ]] } object } # /update.LightGBMHyperparameters # %% setup_LightGBM ---- # References: # LightGBM parameters: https://lightgbm.readthedocs.io/en/latest/Parameters.html #' Setup LightGBM Hyperparameters #' #' Setup hyperparameters for LightGBM training. #' #' Get more information from [lightgbm::lgb.train]. #' #' @param max_nrounds Positive integer: Maximum number of boosting rounds. #' @param force_nrounds Positive integer: Use this many boosting rounds. Disable search for nrounds. #' @param early_stopping_rounds Positive integer: Number of rounds without improvement to stop training. #' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree. #' @param max_depth (Tunable) Integer: Maximum depth of trees. #' @param learning_rate (Tunable) Numeric: Learning rate. #' @param feature_fraction (Tunable) Numeric: Fraction of features to use. #' @param subsample (Tunable) Numeric: Fraction of data to use. #' @param subsample_freq (Tunable) Positive integer: Frequency of subsample. #' @param lambda_l1 (Tunable) Numeric: L1 regularization. #' @param lambda_l2 (Tunable) Numeric: L2 regularization. #' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features. #' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group. #' @param linear_tree Logical: If TRUE, use linear trees. #' @param objective Character: Objective function. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' @param device_type Character: "cpu" or "gpu". #' @param tree_learner Character: "serial", "feature", "data", or "voting". #' @param force_col_wise Logical: Use only with CPU - If TRUE, force col-wise histogram building. #' #' @return LightGBMHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' lightgbm_hyperparams <- setup_LightGBM( #' max_nrounds = 500L, #' learning_rate = c(0.001, 0.01, 0.05), ifw = TRUE #' ) #' lightgbm_hyperparams setup_LightGBM <- function( # nrounds will be auto-tuned if force_nrounds is NULL with a value up to max_nrounds and # using early_stopping_rounds. max_nrounds = 1000L, force_nrounds = NULL, early_stopping_rounds = 10L, # tunable num_leaves = 8L, max_depth = -1L, learning_rate = 0.01, feature_fraction = 1.0, subsample = 1.0, # a.k.a. bagging_fraction {check:hyper} subsample_freq = 1L, lambda_l1 = 0, lambda_l2 = 0, max_cat_threshold = 32L, min_data_per_group = 32L, linear_tree = FALSE, ifw = FALSE, objective = NULL, device_type = "cpu", tree_learner = "serial", force_col_wise = TRUE ) { max_nrounds <- clean_posint(max_nrounds) force_nrounds <- clean_posint(force_nrounds) early_stopping_rounds <- clean_posint(early_stopping_rounds) num_leaves <- clean_posint(num_leaves) max_depth <- clean_int(max_depth) check_floatpos1(learning_rate) check_floatpos1(feature_fraction) check_floatpos1(subsample) subsample_freq <- clean_posint(subsample_freq) check_float0pos(lambda_l1) check_float0pos(lambda_l2) max_cat_threshold <- clean_posint(max_cat_threshold) min_data_per_group <- clean_posint(min_data_per_group) check_logical(linear_tree) LightGBMHyperparameters( max_nrounds = max_nrounds, force_nrounds = force_nrounds, early_stopping_rounds = early_stopping_rounds, num_leaves = num_leaves, max_depth = max_depth, learning_rate = learning_rate, feature_fraction = feature_fraction, subsample = subsample, subsample_freq = subsample_freq, lambda_l1 = lambda_l1, lambda_l2 = lambda_l2, max_cat_threshold = max_cat_threshold, min_data_per_group = min_data_per_group, linear_tree = linear_tree, ifw = ifw, objective = objective, device_type = device_type, tree_learner = tree_learner, force_col_wise = force_col_wise ) } # /rtemis::setupLightGBM # Test that all LightGBM hyperparameters are set by setup_LightGBM stopifnot(all( c(LightGBM_tunable, LightGBM_fixed) %in% names(formals(setup_LightGBM)) )) method(get_hyperparams_need_tuning, LightGBMHyperparameters) <- function(x) { # Get tunable hyperparameters with more than one value out <- x@hyperparameters[x@tunable_hyperparameters[ sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1 ]] if (is.null(x[["nrounds"]])) { out <- c(out, list(nrounds = NULL)) } out } # /get_hyperparams_need_tuning.LightGBMHyperparameters # %% LightRuleFitHyperparameters ---- LightRuleFit_tunable <- c( "nrounds", "num_leaves", "max_depth", "learning_rate", "subsample", "subsample_freq", "lambda_l1", "lambda_l2", "alpha", "ifw_lightgbm", "ifw_glmnet" ) LightRuleFit_fixed <- c("lambda", "objective") LightRuleFit_lightgbm_params <- c( "nrounds", "num_leaves", "max_depth", "learning_rate", "subsample", "subsample_freq", "lambda_l1", "lambda_l2", "objective" ) LightRuleFit_glmnet_params <- c("alpha", "lambda") #' @title LightRuleFitHyperparameters #' #' @description #' Hyperparameters subclass for LightRuleFit. #' #' @author EDG #' @keywords internal #' @noRd LightRuleFitHyperparameters <- new_class( name = "LightRuleFitHyperparameters", parent = Hyperparameters, constructor = function( nrounds, num_leaves, max_depth, learning_rate, subsample, subsample_freq, lambda_l1, lambda_l2, objective, ifw_lightgbm, # GLMNET alpha, lambda, ifw_glmnet, # IFW ifw ) { new_object( Hyperparameters( algorithm = "LightRuleFit", hyperparameters = list( nrounds = nrounds, num_leaves = num_leaves, max_depth = max_depth, learning_rate = learning_rate, subsample = subsample, subsample_freq = subsample_freq, lambda_l1 = lambda_l1, lambda_l2 = lambda_l2, objective = objective, ifw_lightgbm = ifw_lightgbm, # GLMNET alpha = alpha, lambda = lambda, ifw_glmnet = ifw_glmnet, # IFW ifw = ifw ), tunable_hyperparameters = LightRuleFit_tunable, fixed_hyperparameters = LightRuleFit_fixed ) ) } ) # /rtemis::LightRuleFitHyperparameters # %% setup_LightRuleFit ---- #' Setup LightRuleFit Hyperparameters #' #' Setup hyperparameters for LightRuleFit training. #' #' Get more information from [lightgbm::lgb.train]. #' #' @param nrounds (Tunable) Positive integer: Number of boosting rounds. #' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree. #' @param max_depth (Tunable) Integer: Maximum depth of trees. #' @param learning_rate (Tunable) Numeric: Learning rate. #' @param subsample (Tunable) Numeric: Fraction of data to use. #' @param subsample_freq (Tunable) Positive integer: Frequency of subsample. #' @param lambda_l1 (Tunable) Numeric: L1 regularization. #' @param lambda_l2 (Tunable) Numeric: L2 regularization. #' @param objective Character: Objective function. #' @param ifw_lightgbm (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the LightGBM #' step. #' @param objective Character: Objective function. #' @param alpha (Tunable) Numeric: Alpha for GLMNET. #' @param lambda Numeric: Lambda for GLMNET. #' @param ifw_glmnet (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the GLMNET step. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. This applies IFW #' to both LightGBM and GLMNET. #' #' @return LightRuleFitHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' lightrulefit_hyperparams <- setup_LightRuleFit(nrounds = 300L, max_depth = 3L) #' lightrulefit_hyperparams setup_LightRuleFit <- function( nrounds = 200L, num_leaves = 32L, max_depth = 4L, learning_rate = 0.1, subsample = 0.666, subsample_freq = 1L, lambda_l1 = 0, lambda_l2 = 0, objective = NULL, ifw_lightgbm = FALSE, alpha = 1, lambda = NULL, ifw_glmnet = FALSE, ifw = FALSE ) { nrounds <- clean_posint(nrounds) num_leaves <- clean_posint(num_leaves) max_depth <- clean_int(max_depth) check_floatpos1(learning_rate) check_floatpos1(subsample) subsample_freq <- clean_posint(subsample_freq) check_inherits(lambda_l1, "numeric") check_inherits(lambda_l2, "numeric") check_float01inc(alpha) check_inherits(lambda, "numeric") check_logical(ifw_lightgbm) check_logical(ifw_glmnet) check_logical(ifw) # If ifw, cannot have ifw_lightgbm or ifw_glmnet if (ifw) { if (ifw_lightgbm) { cli::cli_abort("Cannot set ifw and ifw_lightgbm at the same time.") } if (ifw_glmnet) { cli::cli_abort("Cannot set ifw and ifw_glmnet at the same time.") } } LightRuleFitHyperparameters( nrounds = nrounds, num_leaves = num_leaves, max_depth = max_depth, learning_rate = learning_rate, subsample = subsample, subsample_freq = subsample_freq, lambda_l1 = lambda_l1, lambda_l2 = lambda_l2, objective = objective, ifw_lightgbm = ifw_lightgbm, alpha = alpha, lambda = lambda, ifw_glmnet = ifw_glmnet, ifw = ifw ) } # /rtemis::setup_LightRuleFit # %% IsotonicHyperparameters ---- Isotonic_tunable <- character() Isotonic_fixed <- character() #' @title IsotonicHyperparameters #' #' @description #' Hyperparameters subclass for Isotonic Regression. #' #' @author EDG #' @keywords internal #' @noRd IsotonicHyperparameters <- new_class( name = "IsotonicHyperparameters", parent = Hyperparameters, constructor = function(ifw) { new_object( Hyperparameters( algorithm = "Isotonic", hyperparameters = list( ifw = ifw ), tunable_hyperparameters = "ifw", fixed_hyperparameters = Isotonic_fixed ) ) } ) # /rtemis::IsotonicHyperparameters # %% setup_Isotonic ---- #' Setup Isotonic Hyperparameters #' #' Setup hyperparameters for Isotonic Regression. #' #' There are not hyperparameters for this algorithm at this moment. #' #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return IsotonicHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' isotonic_hyperparams <- setup_Isotonic(ifw = TRUE) #' isotonic_hyperparams setup_Isotonic <- function(ifw = FALSE) { IsotonicHyperparameters(ifw = ifw) } # /rtemis::setup_Isotonic # %% SVMHyperparameters ---- #' @title SVMHyperparameters #' #' @description #' Hyperparameters subclass for SVM. #' #' @author EDG #' @keywords internal #' @noRd SVMHyperparameters <- new_class( name = "SVMHyperparameters", parent = Hyperparameters, constructor = function( hyperparameters, tunable_hyperparameters, fixed_hyperparameters ) { new_object( Hyperparameters( algorithm = "SVM", hyperparameters = hyperparameters, tunable_hyperparameters = tunable_hyperparameters, fixed_hyperparameters = fixed_hyperparameters ) ) } # /constructor ) # /rtemis::SVMHyperparameters # %% LinearSVMHyperparameters ---- LinearSVM_tunable <- c("cost", "ifw") LinearSVM_fixed <- character() #' @title LinearSVMHyperparameters #' #' @description #' Hyperparameters subclass for SVM with linear kernel. #' #' @author EDG #' @keywords internal #' @noRd LinearSVMHyperparameters <- new_class( name = "LinearSVMHyperparameters", parent = Hyperparameters, constructor = function(cost, ifw) { new_object( Hyperparameters( algorithm = "LinearSVM", hyperparameters = list( kernel = "linear", cost = cost, ifw = ifw ), tunable_hyperparameters = c("cost", "ifw"), fixed_hyperparameters = character() ) ) } # /constructor ) # /rtemis::LinearSVMHyperparameters # %% setup_LinearSVM ---- #' Setup LinearSVM Hyperparameters #' #' Setup hyperparameters for LinearSVM training. #' #' Get more information from [e1071::svm]. #' @param cost (Tunable) Numeric: Cost of constraints violation. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return LinearSVMHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' linear_svm_hyperparams <- setup_LinearSVM(cost = 0.5, ifw = TRUE) #' linear_svm_hyperparams setup_LinearSVM <- function( cost = 1, ifw = FALSE ) { check_inherits(cost, "numeric") check_logical(ifw) LinearSVMHyperparameters( cost = cost, ifw = ifw ) } # /setup_LinearSVM # Test that all SVM hyperparameters are set by setup_SVM stopifnot(all( c(LinearSVM_tunable, LinearSVM_fixed) %in% names(formals(setup_LinearSVM)) )) # %% RadialSVMHyperparameters ---- RadialSVM_tunable <- c("cost", "gamma", "ifw") RadialSVM_fixed <- character() #' @title RadialSVMHyperparameters #' #' @description #' Hyperparameters subclass for SVM with radial kernel. #' #' @author EDG #' @keywords internal #' @noRd RadialSVMHyperparameters <- new_class( name = "RadialSVMHyperparameters", parent = Hyperparameters, constructor = function(cost, gamma, ifw) { new_object( Hyperparameters( algorithm = "RadialSVM", hyperparameters = list( kernel = "radial", cost = cost, gamma = gamma, ifw = ifw ), tunable_hyperparameters = c("cost", "gamma", "ifw"), fixed_hyperparameters = character() ) ) } # /constructor ) # /rtemis::RadialSVMHyperparameters # %% setup_RadialSVM ---- #' Setup RadialSVM Hyperparameters #' #' Setup hyperparameters for RadialSVM training. #' #' Get more information from [e1071::svm]. #' #' @param cost (Tunable) Numeric: Cost of constraints violation. #' @param gamma (Tunable) Numeric: Kernel coefficient. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return RadialSVMHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' radial_svm_hyperparams <- setup_RadialSVM(cost = 10, gamma = 0.1, ifw = TRUE) #' radial_svm_hyperparams setup_RadialSVM <- function( cost = 1, gamma = 0.01, ifw = FALSE ) { check_inherits(cost, "numeric") check_inherits(gamma, "numeric") check_logical(ifw) RadialSVMHyperparameters( cost = cost, gamma = gamma, ifw = ifw ) } # /setup_RadialSVM # Test that all SVM hyperparameters are set by setup_SVM stopifnot(all( c(RadialSVM_tunable, RadialSVM_fixed) %in% names(formals(setup_RadialSVM)) )) # %% TabNetHyperparameters ---- tabnet_tunable <- c( "batch_size", "penalty", "clip_value", "loss", "epochs", "drop_last", "decision_width", "attention_width", "num_steps", "feature_reusage", "mask_type", "virtual_batch_size", "valid_split", "learn_rate", "optimizer", "lr_scheduler", "lr_decay", "step_size", "checkpoint_epochs", "cat_emb_dim", "num_independent", "num_shared", "num_independent_decoder", "num_shared_decoder", "momentum", "pretraining_ratio", "importance_sample_size", "early_stopping_monitor", "early_stopping_tolerance", "early_stopping_patience", "ifw" ) tabnet_fixed <- c("device", "num_workers", "skip_importance") #' @title TabNetHyperparameters #' #' @description #' Hyperparameters subclass for TabNet. #' #' @author EDG #' @keywords internal #' @noRd TabNetHyperparameters <- new_class( name = "TabNetHyperparameters", parent = Hyperparameters, constructor = function( batch_size, penalty, clip_value, loss, epochs, drop_last, decision_width, attention_width, num_steps, feature_reusage, mask_type, virtual_batch_size, valid_split, learn_rate, optimizer, lr_scheduler, lr_decay, step_size, checkpoint_epochs, cat_emb_dim, num_independent, num_shared, num_independent_decoder, num_shared_decoder, momentum, pretraining_ratio, device, importance_sample_size, early_stopping_monitor, early_stopping_tolerance, early_stopping_patience, num_workers, skip_importance, ifw ) { new_object( Hyperparameters( algorithm = "TabNet", hyperparameters = list( batch_size = batch_size, penalty = penalty, clip_value = clip_value, loss = loss, epochs = epochs, drop_last = drop_last, decision_width = decision_width, attention_width = attention_width, num_steps = num_steps, feature_reusage = feature_reusage, mask_type = mask_type, virtual_batch_size = virtual_batch_size, valid_split = valid_split, learn_rate = learn_rate, optimizer = optimizer, lr_scheduler = lr_scheduler, lr_decay = lr_decay, step_size = step_size, checkpoint_epochs = checkpoint_epochs, cat_emb_dim = cat_emb_dim, num_independent = num_independent, num_shared = num_shared, num_independent_decoder = num_independent_decoder, num_shared_decoder = num_shared_decoder, momentum = momentum, pretraining_ratio = pretraining_ratio, device = device, importance_sample_size = importance_sample_size, early_stopping_monitor = early_stopping_monitor, early_stopping_tolerance = early_stopping_tolerance, early_stopping_patience = early_stopping_patience, num_workers = num_workers, skip_importance = skip_importance, ifw = ifw ), tunable_hyperparameters = tabnet_tunable, fixed_hyperparameters = tabnet_fixed ) ) } # /constructor ) # /rtemis::TabNetHyperparameters # %% setup_TabNet ---- #' Setup TabNet Hyperparameters #' #' Setup hyperparameters for TabNet training. #' # Get more information from [tabnet::tabnet_config] #' #' @param batch_size (Tunable) Positive integer: Batch size. #' @param penalty (Tunable) Numeric: Regularization penalty. #' @param clip_value Numeric: Clip value. #' @param loss Character: Loss function. #' @param epochs (Tunable) Positive integer: Number of epochs. #' @param drop_last Logical: If TRUE, drop last batch. #' @param decision_width (Tunable) Positive integer: Decision width. #' @param attention_width (Tunable) Positive integer: Attention width. #' @param num_steps (Tunable) Positive integer: Number of steps. #' @param feature_reusage (Tunable) Numeric: Feature reusage. #' @param mask_type Character: Mask type. #' @param virtual_batch_size (Tunable) Positive integer: Virtual batch size. #' @param valid_split Numeric: Validation split. #' @param learn_rate (Tunable) Numeric: Learning rate. #' @param optimizer Character or torch function: Optimizer. #' @param lr_scheduler Character or torch function: "step", "reduce_on_plateau". #' @param lr_decay Numeric: Learning rate decay. #' @param step_size Positive integer: Step size. #' @param checkpoint_epochs (Tunable) Positive integer: Checkpoint epochs. #' @param cat_emb_dim (Tunable) Positive integer: Categorical embedding dimension. #' @param num_independent (Tunable) Positive integer: Number of independent Gated Linear Units (GLU) #' at each step of the encoder. #' @param num_shared (Tunable) Positive integer: Number of shared Gated Linear Units (GLU) at each #' step of the encoder. #' @param num_independent_decoder (Tunable) Positive integer: Number of independent GLU layers for #' pretraining. #' @param num_shared_decoder (Tunable) Positive integer: Number of shared GLU layers for #' pretraining. #' @param momentum (Tunable) Numeric: Momentum. #' @param pretraining_ratio (Tunable) Numeric: Pretraining ratio. #' @param device Character: Device "cpu" or "cuda". #' @param importance_sample_size Positive integer: Importance sample size. #' @param early_stopping_monitor Character: Early stopping monitor. "valid_loss", "train_loss", #' "auto". #' @param early_stopping_tolerance Numeric: Minimum relative improvement to reset the patience #' counter. #' @param early_stopping_patience Positive integer: Number of epochs without improving before #' stopping. #' @param num_workers Positive integer: Number of subprocesses for data loacding. #' @param skip_importance Logical: If TRUE, skip importance calculation. #' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. #' #' @return TabNetHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' tabnet_hyperparams <- setup_TabNet(epochs = 100L, learn_rate = 0.01) #' tabnet_hyperparams setup_TabNet <- function( batch_size = 1024^2, penalty = 0.001, clip_value = NULL, loss = "auto", epochs = 50L, drop_last = FALSE, decision_width = NULL, attention_width = NULL, num_steps = 3L, feature_reusage = 1.3, mask_type = "sparsemax", virtual_batch_size = 256^2, valid_split = 0, learn_rate = 0.02, optimizer = "adam", lr_scheduler = NULL, lr_decay = 0.1, step_size = 30, checkpoint_epochs = 10L, cat_emb_dim = 1L, num_independent = 2L, num_shared = 2L, num_independent_decoder = 1L, num_shared_decoder = 1L, momentum = 0.02, pretraining_ratio = 0.5, device = "auto", importance_sample_size = NULL, early_stopping_monitor = "auto", early_stopping_tolerance = 0, early_stopping_patience = 0, num_workers = 0L, skip_importance = FALSE, ifw = FALSE ) { TabNetHyperparameters( batch_size = batch_size, penalty = penalty, clip_value = clip_value, loss = loss, epochs = epochs, drop_last = drop_last, decision_width = decision_width, attention_width = attention_width, num_steps = num_steps, feature_reusage = feature_reusage, mask_type = mask_type, virtual_batch_size = virtual_batch_size, valid_split = valid_split, learn_rate = learn_rate, optimizer = optimizer, lr_scheduler = lr_scheduler, lr_decay = lr_decay, step_size = step_size, checkpoint_epochs = checkpoint_epochs, cat_emb_dim = cat_emb_dim, num_independent = num_independent, num_shared = num_shared, num_independent_decoder = num_independent_decoder, num_shared_decoder = num_shared_decoder, momentum = momentum, pretraining_ratio = pretraining_ratio, device = device, importance_sample_size = importance_sample_size, early_stopping_monitor = early_stopping_monitor, early_stopping_tolerance = early_stopping_tolerance, early_stopping_patience = early_stopping_patience, num_workers = num_workers, skip_importance = skip_importance, ifw = ifw ) } # /setup_TabNet # Test that all TabNet hyperparameters are set by setup_TabNet stopifnot(all( c(tabnet_tunable, tabnet_fixed) %in% names(formals(setup_TabNet)) )) get_tabnet_config <- function(hyperparameters) { check_is_S7(hyperparameters, TabNetHyperparameters) hpr <- hyperparameters@hyperparameters hpr[["ifw"]] <- NULL do.call(tabnet::tabnet_config, hpr) } # /get_tabnet_config # %% RangerHyperparameters ---- ranger_tunable <- c( "num_trees", "mtry", "min_node_size", "max_depth", "sample_fraction", "replace", "splitrule", "num_random_splits", "alpha", "minprop", "regularization_factor", "ifw" ) ranger_fixed <- c( "importance", "write_forest", "probability", "min_bucket", "case_weights", # set by train "class_weights", # set by train "poisson_tau", "split_select_weights", "always_split_variables", "respect_unordered_factors", "scale_permutation_importance", "local_importance", "regularization_usedepth", "keep_inbag", "inbag", "holdout", "quantreg", "time_interest", "oob_error", "save_memory", "verbose", "node_stats", "seed", "na_action" ) #' @title RangerHyperparameters #' #' @description #' Hyperparameters subclass for Ranger Random Forest. #' #' @author EDG #' @keywords internal #' @noRd RangerHyperparameters <- new_class( name = "RangerHyperparameters", parent = Hyperparameters, constructor = function( num_trees, mtry, importance, write_forest, probability, min_node_size, min_bucket, max_depth, replace, sample_fraction, case_weights, class_weights, splitrule, num_random_splits, alpha, minprop, poisson_tau, split_select_weights, always_split_variables, respect_unordered_factors, scale_permutation_importance, local_importance, regularization_factor, regularization_usedepth, keep_inbag, inbag, holdout, quantreg, time_interest, oob_error, save_memory, verbose, node_stats, seed, na_action, ifw ) { new_object( Hyperparameters( algorithm = "Ranger", hyperparameters = list( num_trees = num_trees, mtry = mtry, importance = importance, write_forest = write_forest, probability = probability, min_node_size = min_node_size, min_bucket = min_bucket, max_depth = max_depth, replace = replace, sample_fraction = sample_fraction, case_weights = case_weights, class_weights = class_weights, splitrule = splitrule, num_random_splits = num_random_splits, alpha = alpha, minprop = minprop, poisson_tau = poisson_tau, split_select_weights = split_select_weights, always_split_variables = always_split_variables, respect_unordered_factors = respect_unordered_factors, scale_permutation_importance = scale_permutation_importance, local_importance = local_importance, regularization_factor = regularization_factor, regularization_usedepth = regularization_usedepth, keep_inbag = keep_inbag, inbag = inbag, holdout = holdout, quantreg = quantreg, time_interest = time_interest, oob_error = oob_error, save_memory = save_memory, verbose = verbose, node_stats = node_stats, seed = seed, na_action = na_action, ifw = ifw ), tunable_hyperparameters = ranger_tunable, fixed_hyperparameters = ranger_fixed ) ) } # /constructor ) # /rtemis::RangerHyperparameters # %% setup_Ranger ---- #' Setup Ranger Hyperparameters #' #' Setup hyperparameters for Ranger Random Forest training. #' #' Get more information from [ranger::ranger]. #' #' @param num_trees (Tunable) Positive integer: Number of trees. #' @param mtry (Tunable) Positive integer: Number of features to consider at each split. #' @param importance Character: Variable importance mode. "none", "impurity", "impurity_corrected", "permutation". #' The "impurity" measure is the Gini index for classification, the variance of the responses for regression. #' @param write_forest Logical: Save ranger.forest object, required for prediction. Set to FALSE to reduce memory usage if no prediction intended. #' @param probability Logical: Grow a probability forest as in Malley et al. (2012). For classification only. #' @param min_node_size (Tunable) Positive integer: Minimal node size. Default 1 for classification, 5 for regression, 3 for survival, and 10 for probability. #' @param min_bucket Positive integer: Minimal number of samples in a terminal node. Only for survival. Deprecated in favor of min_node_size. #' @param max_depth (Tunable) Positive integer: Maximal tree depth. A value of NULL or 0 (the default) corresponds to unlimited depth, 1 to tree stumps (1 split per tree). #' @param replace Logical: Sample with replacement. #' @param sample_fraction (Tunable) Numeric: Fraction of observations to sample. Default is 1 for sampling with replacement and 0.632 for sampling without replacement. #' @param case_weights Numeric vector: Weights for sampling of training observations. Observations with larger weights will be selected with higher probability in the bootstrap (or subsampled) samples for the trees. #' @param class_weights Numeric vector: Weights for the outcome classes for classification. Vector of the same length as the number of classes, with names corresponding to the class labels. #' @param splitrule (Tunable) Character: Splitting rule. For classification: "gini", "extratrees", "hellinger". For regression: "variance", "extratrees", "maxstat", "beta". For survival: "logrank", "extratrees", "C", "maxstat". #' @param num_random_splits (Tunable) Positive integer: For "extratrees" splitrule: Number of random splits to consider for each candidate splitting variable. #' @param alpha (Tunable) Numeric: For "maxstat" splitrule: significance threshold to allow splitting. #' @param minprop (Tunable) Numeric: For "maxstat" splitrule: lower quantile of covariate distribution to be considered for splitting. #' @param poisson_tau Numeric: For "poisson" regression splitrule: tau parameter for Poisson regression. #' @param split_select_weights Numeric vector: Numeric vector with weights between 0 and 1, representing the probability to select variables for splitting. Alternatively, a list of size num_trees, with one weight vector per tree. #' @param always_split_variables Character vector: Character vector with variable names to be always selected in addition to the mtry variables tried for splitting. #' @param respect_unordered_factors Character or logical: Handling of unordered factor covariates. For "partition" all 2^(k-1)-1 possible partitions are considered for splitting, where k is the number of factor levels. For "ignore", all factor levels are ordered by their first occurrence in the data. For "order", all factor levels are ordered by their average response. TRUE corresponds to "partition" for the randomForest package compatibility. #' @param scale_permutation_importance Logical: Scale permutation importance by standard error as in (Breiman 2001). Only applicable if permutation variable importance mode selected. #' @param local_importance Logical: For permutation variable importance, use local importance as in Breiman (2001) and Liaw & Wiener (2002). #' @param regularization_factor (Tunable) Numeric: Regularization factor. Penalize variables with many split points. Requires splitrule = "variance". #' @param regularization_usedepth Logical: Use regularization factor with node depth. Requires regularization_factor. #' @param keep_inbag Logical: Save how often observations are in-bag in each tree. These will be used for (local) variable importance if inbag.counts in predict() is NULL. #' @param inbag List: Manually set observations per tree. List of size num_trees, containing inbag counts for each observation. Can be used for stratified sampling. #' @param holdout Logical: Hold-out mode. Hold-out all samples with case weight 0 and use these for variable importance and prediction error. #' @param quantreg Logical: Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). For regression only. Set keep_inbag = TRUE to prepare out-of-bag quantile prediction. #' @param time_interest Numeric: For GWAS data: SNP with this number will be used as time variable. Only for survival. Deprecated, use time.var in formula instead. #' @param oob_error Logical: Compute OOB prediction error. Set to FALSE to save computation time if only the forest is needed. #' @param save_memory Logical: Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems. #' @param verbose Logical: Show computation status and estimated runtime. #' @param node_stats Logical: Save additional node statistics. Only terminal nodes for now. #' @param seed Positive integer: Random seed. Default is NULL, which generates the seed from R. Set to 0 to ignore the R seed. #' @param na_action Character: Action to take if the data contains missing values. "na.learn" uses observations with missing values in splitting, treating missing values as a separate category. #' @param ifw Logical: Inverse Frequency Weighting for classification. If TRUE, class weights are set inversely proportional to the class frequencies. #' #' @return RangerHyperparameters object. #' #' @author EDG #' @export #' #' @examples #' ranger_hyperparams <- setup_Ranger(num_trees = 1000L, ifw = FALSE) #' ranger_hyperparams setup_Ranger <- function( num_trees = 500, mtry = NULL, importance = "impurity", write_forest = TRUE, probability = FALSE, min_node_size = NULL, min_bucket = NULL, max_depth = NULL, replace = TRUE, sample_fraction = ifelse(replace, 1, 0.632), case_weights = NULL, class_weights = NULL, splitrule = NULL, num_random_splits = 1, alpha = 0.5, minprop = 0.1, poisson_tau = 1, split_select_weights = NULL, always_split_variables = NULL, respect_unordered_factors = NULL, scale_permutation_importance = FALSE, local_importance = FALSE, regularization_factor = 1, regularization_usedepth = FALSE, keep_inbag = FALSE, inbag = NULL, holdout = FALSE, quantreg = FALSE, time_interest = NULL, oob_error = TRUE, save_memory = FALSE, verbose = TRUE, node_stats = FALSE, seed = NULL, na_action = "na.learn", ifw = FALSE ) { num_trees <- clean_posint(num_trees) mtry <- clean_posint(mtry) check_inherits(importance, "character") check_inherits(write_forest, "logical") check_inherits(probability, "logical") min_node_size <- clean_posint(min_node_size) min_bucket <- clean_posint(min_bucket) max_depth <- clean_posint(max_depth) check_inherits(replace, "logical") check_float01inc(sample_fraction) check_inherits(case_weights, "numeric") check_inherits(class_weights, "numeric") check_inherits(splitrule, "character") num_random_splits <- clean_posint(num_random_splits) check_float01inc(alpha) check_float01inc(minprop) check_inherits(poisson_tau, "numeric") check_inherits(split_select_weights, "numeric") check_inherits(always_split_variables, "character") check_inherits(respect_unordered_factors, "logical") check_inherits(scale_permutation_importance, "logical") check_inherits(local_importance, "logical") check_inherits(regularization_factor, "numeric") check_inherits(regularization_usedepth, "logical") check_inherits(keep_inbag, "logical") check_inherits(inbag, "list") check_inherits(holdout, "logical") check_inherits(quantreg, "logical") check_inherits(time_interest, "numeric") check_inherits(oob_error, "logical") check_inherits(save_memory, "logical") check_inherits(verbose, "logical") check_inherits(node_stats, "logical") check_inherits(seed, "numeric") check_inherits(na_action, "character") check_logical(ifw) RangerHyperparameters( num_trees = num_trees, mtry = mtry, importance = importance, write_forest = write_forest, probability = probability, min_node_size = min_node_size, min_bucket = min_bucket, max_depth = max_depth, replace = replace, sample_fraction = sample_fraction, case_weights = case_weights, class_weights = class_weights, splitrule = splitrule, num_random_splits = num_random_splits, alpha = alpha, minprop = minprop, poisson_tau = poisson_tau, split_select_weights = split_select_weights, always_split_variables = always_split_variables, respect_unordered_factors = respect_unordered_factors, scale_permutation_importance = scale_permutation_importance, local_importance = local_importance, regularization_factor = regularization_factor, regularization_usedepth = regularization_usedepth, keep_inbag = keep_inbag, inbag = inbag, holdout = holdout, quantreg = quantreg, time_interest = time_interest, oob_error = oob_error, save_memory = save_memory, verbose = verbose, node_stats = node_stats, seed = seed, na_action = na_action, ifw = ifw ) } # /setup_Ranger # Test that all Ranger hyperparameters are set by setup_Ranger stopifnot(all( c(ranger_tunable, ranger_fixed) %in% names(formals(setup_Ranger)) )) # %% .list_to_Hyperparameters ---- #' Convert a list to a Hyperparameters object #' #' Internal function used by `rtemis.server` to reconstruct a `Hyperparameters` #' object from a wire-format list. Not intended for direct use by end users. #' #' @param x Named list with two elements: #' \describe{ #' \item{`algorithm`}{Character: algorithm name, e.g. `"GLM"`, `"RF"`.} #' \item{`hyperparameters`}{Named list of hyperparameter name-value pairs #' passed to the corresponding `setup_()` function.} #' } #' #' @return A `Hyperparameters` object as returned by `setup_()`. #' #' @author EDG #' @keywords internal #' @export .list_to_Hyperparameters <- function(x) { fn <- paste0("setup_", x[["algorithm"]]) if (!exists(fn, mode = "function")) { cli::cli_abort(".val Invalid algorithm: {x[['algorithm']]}.") } args <- x[["hyperparameters"]] # Keep only arguments that are in the setup function setup_formals <- names(formals(get(fn))) args <- args[names(args) %in% setup_formals] do.call(fn, args) } ================================================ FILE: R/03_Metrics.R ================================================ # S7_Metrics.R # ::rtemis:: # 2025- EDG rtemis.org # %% Metrics ---- #' @title Metrics #' #' @description #' Superclass for Metrics metrics. #' #' @field sample Character: Sample name. #' @field metrics List or data.frame: Metrics. #' #' @author EDG #' @noRd Metrics <- new_class( name = "Metrics", properties = list( sample = class_character | NULL, metrics = class_list | class_data.frame ) ) # /rtemis::Metrics # %% `$`.Metrics ---- # Make Metrics@metrics `$`-accessible method(`$`, Metrics) <- function(x, name) { x@metrics[[name]] } # %% `.DollarNames`.Metrics ---- # `$`-autocomplete Metrics@metrics method(`.DollarNames`, Metrics) <- function(x, pattern = "") { all_names <- names(x@metrics) grep(pattern, all_names, value = TRUE) } # %% `[[`.Metrics ---- # Make Metrics@metrics `[[`-accessible method(`[[`, Metrics) <- function(x, name) { x@metrics[[name]] } # %% RegressionMetrics ---- #' @title RegressionMetrics #' #' @description #' Metrics subclass for regression models. #' #' @author EDG #' @noRd RegressionMetrics <- new_class( name = "RegressionMetrics", parent = Metrics, # properties = list( # MAE = class_numeric, # MSE = class_numeric, # RMSE = class_numeric, # Rsq = class_numeric # ), constructor = function(MAE, MSE, RMSE, Rsq, sample = NULL) { new_object( Metrics( sample = sample, metrics = data.frame( MAE = MAE, MSE = MSE, RMSE = RMSE, Rsq = Rsq ) ) ) } ) # /rtemis::RegressionMetrics # %% repr.RegressionMetrics ---- # Show RegressionMetrics ---- method(repr, RegressionMetrics) <- function( x, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) out <- if (!is.null(x@sample)) { repr_S7name( paste(x@sample, "Regression Metrics"), pad = pad, output_type = output_type ) } else { repr_S7name("Regression Metrics", pad = pad, output_type = output_type) } out <- paste0( out, repr_ls( x@metrics, print_class = FALSE, print_df = TRUE, pad = pad + 2L, output_type = output_type ) ) out } # /rtemis::repr.RegressionMetrics # %% print.RegressionMetrics ---- method(print, RegressionMetrics) <- function( x, pad = 0L, output_type = c("ansi", "html", "plain"), ... ) { cat(repr(x, pad = pad, output_type = output_type)) invisible(x) } # /rtemis::print.RegressionMetrics # %% ClassificationMetrics ---- #' @title ClassificationMetrics #' #' @description #' Metrics subclass for classification models. #' #' @author EDG #' @keywords internal #' @noRd ClassificationMetrics <- new_class( name = "ClassificationMetrics", parent = Metrics, constructor = function( Confusion_Matrix, Overall, Class, Positive_Class, sample = NULL ) { new_object( Metrics( sample = sample, metrics = list( Confusion_Matrix = Confusion_Matrix, Overall = Overall, Class = Class, Positive_Class = Positive_Class ) ) ) } ) # /rtemis::ClassificationMetrics # %% repr.ClassificationMetrics ---- method(repr, ClassificationMetrics) <- function( x, decimal_places = 3L, pad = 0L, output_type = NULL, ... ) { output_type <- get_output_type(output_type) if (!is.null(x@sample)) { out <- repr_S7name( paste(x@sample, "Classification Metrics"), pad = pad, output_type = output_type ) } else { out <- repr_S7name( "Classification Metrics", pad = pad, output_type = output_type ) } # Confusion Matrix # suggestion: document 17 and 9 tblpad <- 17L - max(nchar(colnames(x@metrics[["Confusion_Matrix"]])), 9L) + pad out <- paste0( out, show_table(x[["Confusion_Matrix"]], pad = tblpad, output_type = output_type) ) out <- paste0( out, "\n", show_df( x@metrics[["Overall"]], pad = pad, transpose = TRUE, ddSci_dp = decimal_places, justify = "left", spacing = 2L, output_type = output_type ) ) if (is.na(x@metrics[["Positive_Class"]])) { out <- paste0( out, show_df( x@metrics[["Class"]], pad = pad, transpose = TRUE, ddSci_dp = decimal_places, justify = "left", spacing = 2, output_type = output_type ) ) } else { out <- paste0( out, "\n Positive Class ", fmt( x@metrics[["Positive_Class"]], col = highlight_col, bold = TRUE, output_type = output_type ), "\n" ) } out } # /rtemis::repr.ClassificationMetrics # %% print.ClassificationMetrics ---- method(print, ClassificationMetrics) <- function( x, decimal_places = 3, pad = 0L, output_type = c("ansi", "html", "plain"), ... ) { cat(repr( x, decimal_places = decimal_places, pad = pad, output_type = output_type )) invisible(x) } # /rtemis::print.ClassificationMetrics # %% MetricsRes ---- #' @title MetricsRes #' #' @description #' Superclass for MetricsRes metrics. #' #' @field sample Character: Sample name. #' #' @author EDG #' @noRd MetricsRes <- new_class( name = "MetricsRes", properties = list( sample = class_character | NULL, res_metrics = class_list, mean_metrics = class_data.frame, sd_metrics = class_data.frame ) ) # /rtemis::MetricsRes # %% repr.MetricsRes ---- method(repr, MetricsRes) <- function( x, decimal_places = 3L, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) type <- if (S7_inherits(x, RegressionMetricsRes)) { "Regression" } else { "Classification" } out <- repr_S7name( paste("Resampled", type, x@sample, "Metrics"), pad = pad, output_type = output_type ) out <- paste0(out, strrep(" ", pad)) out <- paste0( out, italic(" Showing mean (sd) across resamples.\n", output_type = output_type) ) # Create list with mean_metrics (sd_metrics) metricsl <- lapply(seq_along(x@mean_metrics), function(i) { paste0( ddSci(x@mean_metrics[[i]], decimal_places), gray( paste0(" (", ddSci(x@sd_metrics[[i]], decimal_places), ")"), output_type = output_type ) ) }) names(metricsl) <- names(x@mean_metrics) out <- paste0( out, repr_ls( metricsl, print_class = FALSE, print_df = TRUE, pad = pad + 2L, output_type = output_type ) ) out } # /rtemis::repr.MetricsRes # %% print.MetricsRes ---- method(print, MetricsRes) <- function( x, decimal_places = 3L, pad = 0L, output_type = NULL, ... ) { cat(repr(x, decimal_places, pad = pad, output_type = output_type)) invisible(x) } # /rtemis::print.MetricsRes # %% RegressionMetricsRes ---- #' @author EDG #' @noRd RegressionMetricsRes <- new_class( name = "RegressionMetricsRes", parent = MetricsRes, constructor = function(sample, res_metrics) { new_object( MetricsRes( sample = sample, res_metrics = res_metrics, mean_metrics = vec2df( colMeans(do.call(rbind, lapply(res_metrics, function(x) x@metrics))) ), sd_metrics = vec2df( sapply(do.call(rbind, lapply(res_metrics, function(x) x@metrics)), sd) ) ) ) } ) # /rtemis::RegressionMetricsRes #' @author EDG #' @noRd ClassificationMetricsRes <- new_class( name = "ClassificationMetricsRes", parent = MetricsRes, constructor = function(sample, res_metrics) { new_object( MetricsRes( sample = sample, res_metrics = res_metrics, mean_metrics = vec2df( colMeans(do.call( rbind, lapply(res_metrics, function(x) x@metrics[["Overall"]]) )) ), sd_metrics = vec2df( sapply( do.call( rbind, lapply(res_metrics, function(x) x@metrics[["Overall"]]) ), sd ) ) ) ) } ) # /rtemis::ClassificationMetricsRes # %% repr.CalibratedClassification ---- #' @param x `ClassificationMetrics` before calibration. #' @param x_cal `ClassificationMetrics` after calibration. #' #' @author EDG #' #' @keywords internal #' @noRd repr_CalibratedClassificationMetrics <- function( x, x_cal, decimal_places = 2L, pad = 2L, output_type = NULL ) { output_type <- get_output_type(output_type) if (!is.null(x@sample)) { out <- repr_S7name( paste(x@sample, "Classification Metrics (Pre => Post Calibration)"), pad = pad, output_type = output_type ) } else { out <- repr_S7name( "Classification Metrics (Pre => Post Calibration)", pad = pad, output_type = output_type ) } # Confusion Matrix: Pre=>Post prepost_cm <- paste_tables( x@metrics[["Confusion_Matrix"]], x_cal@metrics[["Confusion_Matrix"]], sep = " => " ) tblpad <- 17L - max(nchar(colnames(prepost_cm)), 9L) + pad out <- paste0( out, show_table(prepost_cm, pad = tblpad, output_type = output_type) ) # Overall metrics: Pre=>Post # Note: decimal formatting handled by paste_dfs with decimal_places parameter out <- paste0( out, "\n", show_df( paste_dfs( x@metrics[["Overall"]], x_cal@metrics[["Overall"]], sep = " => ", decimal_places = decimal_places ), pad = pad, transpose = TRUE, ddSci_dp = NULL, justify = "left", spacing = 2L, output_type = output_type ) ) # Class metrics: Pre=>Post (for multiclass) or Positive Class (for binary) if (is.na(x@metrics[["Positive_Class"]])) { out <- paste0( out, show_df( paste_dfs( x@metrics[["Class"]], x_cal@metrics[["Class"]], decimal_places = decimal_places ), pad = pad, transpose = TRUE, ddSci_dp = NULL, justify = "left", spacing = 2, output_type = output_type ) ) } else { out <- paste0( out, "\n Positive Class ", fmt( x@metrics[["Positive_Class"]], col = highlight_col, bold = TRUE, output_type = output_type ), "\n" ) } out } # /rtemis::repr_CalibratedClassification # %% repr.CalibratedClassificationResMetrics ---- #' @param x `ClassificationMetricsRes` before calibration. #' @param x_cal `ClassificationMetricsRes` after calibration. #' #' @author EDG #' #' @keywords internal #' @noRd repr_CalibratedClassificationResMetrics <- function( x, x_cal, decimal_places = 2L, pad = 2L, output_type = NULL ) { output_type <- get_output_type(output_type) out <- repr_S7name( paste( "Resampled Classification", x@sample, "Metrics (Pre => Post Calibration)" ), pad = pad, output_type = output_type ) out <- paste0(out, strrep(" ", pad)) out <- paste0( out, italic( " Showing mean (sd) across resamples, Pre => Post calibration.\n", output_type = output_type ) ) # Create pre and post formatted strings: mean (sd) pre_strings <- lapply(seq_along(x@mean_metrics), function(i) { paste0( ddSci(x@mean_metrics[[i]], decimal_places), gray( paste0(" (", ddSci(x@sd_metrics[[i]], decimal_places), ")"), output_type = output_type ) ) }) names(pre_strings) <- names(x@mean_metrics) post_strings <- lapply(seq_along(x_cal@mean_metrics), function(i) { paste0( ddSci(x_cal@mean_metrics[[i]], decimal_places), gray( paste0(" (", ddSci(x_cal@sd_metrics[[i]], decimal_places), ")"), output_type = output_type ) ) }) names(post_strings) <- names(x_cal@mean_metrics) # Combine pre=>post prepost_strings <- lapply(seq_along(pre_strings), function(i) { paste(pre_strings[[i]], post_strings[[i]], sep = " => ") }) names(prepost_strings) <- names(pre_strings) out <- paste0( out, repr_ls( prepost_strings, print_class = FALSE, print_df = TRUE, pad = pad + 2L, output_type = output_type ) ) out } # /rtemis::repr_CalibratedClassificationResMetrics ================================================ FILE: R/04_Preprocessor.R ================================================ # S7_Preprocessor.R # ::rtemis:: # 2025- EDG rtemis.org # References # https://github.com/RConsortium/S7/ # https://rconsortium.github.io/S7 # %% PreprocessorConfig ---- #' @title PreprocessorConfig #' #' @description #' PreprocessorConfig class. #' #' @author EDG #' @noRd PreprocessorConfig <- new_class( name = "PreprocessorConfig", properties = list( complete_cases = class_logical, remove_features_thres = class_numeric | NULL, remove_cases_thres = class_numeric | NULL, missingness = class_logical, impute = class_logical, impute_type = class_character, impute_missRanger_params = class_list, impute_discrete = class_character, impute_continuous = class_character, integer2factor = class_logical, integer2numeric = class_logical, logical2factor = class_logical, logical2numeric = class_logical, numeric2factor = class_logical, numeric2factor_levels = class_character | NULL, numeric_cut_n = class_numeric, numeric_cut_labels = class_logical, numeric_quant_n = class_numeric, numeric_quant_NAonly = class_logical, unique_len2factor = class_numeric, character2factor = class_logical, factorNA2missing = class_logical, factorNA2missing_level = class_character, factor2integer = class_logical, factor2integer_startat0 = class_logical, scale = class_logical, center = class_logical, scale_centers = class_numeric | NULL, scale_coefficients = class_numeric | NULL, remove_constants = class_logical, remove_constants_skip_missing = class_logical, remove_duplicates = class_logical, remove_features = class_character | NULL, one_hot = class_logical, one_hot_levels = class_list | NULL, add_date_features = class_logical, date_features = class_character, add_holidays = class_logical, exclude = class_character | NULL ) ) # /PreprocessorConfig # %% names.PreprocessorConfig ---- # Names PreprocessorConfig method(names, PreprocessorConfig) <- function(x) { names(props(x)) } # %% `$`.PreprocessorConfig ---- # Make props `$`-accessible method(`$`, PreprocessorConfig) <- function(x, name) { props(x)[[name]] } # %% `.DollarNames`.PreprocessorConfig ---- # DollarSign tab-complete property names method(`.DollarNames`, PreprocessorConfig) <- function(x, pattern = "") { all_names <- names(props(x)) grep(pattern, all_names, value = TRUE) } # %% `[[`.PreprocessorConfig ---- # Make proprs `[[`-accessible method(`[[`, PreprocessorConfig) <- function(x, name) { props(x)[[name]] } # %% repr.PreprocessorConfig ---- method(repr, PreprocessorConfig) <- function( x, limit = -1L, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) paste0( repr_S7name("PreprocessorConfig", pad = pad, output_type = output_type), repr_ls(props(x), pad = pad, limit = limit, output_type = output_type) ) } # /rtemis::repr.PreprocessorConfig # %% print.PreprocessorConfig ---- method(print, PreprocessorConfig) <- function( x, limit = -1L, output_type = NULL, ... ) { cat(repr(x, limit = limit, output_type = output_type)) invisible(x) } # /rtemis::print.PreprocessorConfig # %% setup_Preprocessor ---- #' Setup Preprocessor #' #' @description #' Creates a `PreprocessorConfig` object, which can be used in [preprocess]. #' #' @param complete_cases Logical: If TRUE, only retain complete cases (no missing data). #' @param remove_cases_thres Float (0, 1): Remove cases with >= to this fraction #' of missing features. #' @param remove_features_thres Float (0, 1): Remove features with missing #' values in >= to this fraction of cases. #' @param missingness Logical: If TRUE, generate new boolean columns for each #' feature with missing values, indicating which cases were missing data. #' @param impute Logical: If TRUE, impute missing cases. See `impute_discrete` and #' `impute_continuous`. #' @param impute_type Character: Package to use for imputation. #' @param impute_missRanger_params Named list with elements "pmm.k" and #' "maxiter", which are passed to `missRanger::missRanger`. `pmm.k` #' greater than 0 results in predictive mean matching. Default `pmm.k = 3` #' `maxiter = 10` `num.trees = 500`. Reduce `num.trees` for #' faster imputation especially in large datasets. Set `pmm.k = 0` to #' disable predictive mean matching. #' @param impute_discrete Character: Name of function that returns single value: How to impute #' discrete variables for `impute_type = "meanMode"`. #' @param impute_continuous Character: Name of function that returns single value: How to impute #' continuous variables for `impute_type = "meanMode"`. #' @param integer2factor Logical: If TRUE, convert all integers to factors. This includes #' `bit64::integer64` columns. #' @param integer2numeric Logical: If TRUE, convert all integers to numeric #' (will only work if `integer2factor = FALSE`). This includes #' `bit64::integer64` columns. #' @param logical2factor Logical: If TRUE, convert all logical variables to #' factors. #' @param logical2numeric Logical: If TRUE, convert all logical variables to #' numeric. #' @param numeric2factor Logical: If TRUE, convert all numeric variables to #' factors. #' @param numeric2factor_levels Character vector: Optional - will be passed to #' `levels` arg of `factor()` if `numeric2factor = TRUE`. For advanced/ #' specific use cases; need to know unique values of numeric vector(s) and given all #' numeric vars have same unique values. #' @param numeric_cut_n Integer: If > 0, convert all numeric variables to factors by #' binning using `base::cut` with `breaks` equal to this number. #' @param numeric_cut_labels Logical: The `labels` argument of [base::cut]. #' @param numeric_quant_n Integer: If > 0, convert all numeric variables to factors by #' binning using `base::cut` with `breaks` equal to this number of quantiles. #' produced using `stats::quantile`. #' @param numeric_quant_NAonly Logical: If TRUE, only bin numeric variables with #' missing values. #' @param unique_len2factor Integer (>=2): Convert all variables with less #' than or equal to this number of unique values to factors. #' For example, if binary variables are encoded with 1, 2, you could use #' `unique_len2factor = 2` to convert them to factors. #' @param character2factor Logical: If TRUE, convert all character variables to #' factors. #' @param factorNA2missing Logical: If TRUE, make NA values in factors be of #' level `factorNA2missing_level`. In many cases this is the preferred way #' to handle missing data in categorical variables. Note that since this step #' is performed before imputation, you can use this option to handle missing #' data in categorical variables and impute numeric variables in the same #' `preprocess` call. #' @param factorNA2missing_level Character: Name of level if #' `factorNA2missing = TRUE`. #' @param factor2integer Logical: If TRUE, convert all factors to integers. #' @param factor2integer_startat0 Logical: If TRUE, start integer coding at 0. #' @param scale Logical: If TRUE, scale columns of `x`. #' @param center Logical: If TRUE, center columns of `x`. Note that by #' default it is the same as `scale`. #' @param scale_centers Named vector: Centering values for each feature. #' @param scale_coefficients Named vector: Scaling values for each feature. #' @param remove_constants Logical: If TRUE, remove constant columns. #' @param remove_constants_skip_missing Logical: If TRUE, skip missing values, before #' checking if feature is constant. #' @param remove_features Character vector: Features to remove. #' @param remove_duplicates Logical: If TRUE, remove duplicate cases. #' @param one_hot Logical: If TRUE, convert all factors using one-hot encoding. #' @param one_hot_levels List: Named list of the form "feature_name" = "levels". Used when applying #' one-hot encoding to validation or test data using `Preprocessor`. #' @param add_date_features Logical: If TRUE, extract date features from date columns. #' @param date_features Character vector: Features to extract from dates. #' @param add_holidays Logical: If TRUE, extract holidays from date columns. #' @param exclude Integer, vector: Exclude these columns from preprocessing. #' #' @section Order of Operations: #' #' * keep complete cases only #' * remove constants #' * remove duplicates #' * remove cases by missingness threshold #' * remove features by missingness threshold #' * integer to factor #' * integer to numeric #' * logical to factor #' * logical to numeric #' * numeric to factor #' * cut numeric to n bins #' * cut numeric to n quantiles #' * numeric with less than N unique values to factor #' * character to factor #' * factor NA to named level #' * add missingness column #' * impute #' * scale and/or center #' * one-hot encoding #' #' @return `PreprocessorConfig` object. #' #' @author EDG #' @export #' #' @examples #' preproc_config <- setup_Preprocessor(factorNA2missing = TRUE) #' preproc_config setup_Preprocessor <- function( complete_cases = FALSE, remove_features_thres = NULL, remove_cases_thres = NULL, missingness = FALSE, impute = FALSE, impute_type = c( "missRanger", "micePMM", "meanMode" ), impute_missRanger_params = list( pmm.k = 3, maxiter = 10, num.trees = 500 ), impute_discrete = "get_mode", impute_continuous = "mean", integer2factor = FALSE, integer2numeric = FALSE, logical2factor = FALSE, logical2numeric = FALSE, numeric2factor = FALSE, numeric2factor_levels = NULL, numeric_cut_n = 0, numeric_cut_labels = FALSE, numeric_quant_n = 0, numeric_quant_NAonly = FALSE, unique_len2factor = 0, character2factor = FALSE, factorNA2missing = FALSE, factorNA2missing_level = "missing", # nonzeroFactors = FALSE, factor2integer = FALSE, factor2integer_startat0 = TRUE, scale = FALSE, center = scale, scale_centers = NULL, scale_coefficients = NULL, remove_constants = FALSE, remove_constants_skip_missing = TRUE, remove_features = NULL, remove_duplicates = FALSE, one_hot = FALSE, one_hot_levels = NULL, # cleanfactorlevels = FALSE, add_date_features = FALSE, date_features = c("weekday", "month", "year"), add_holidays = FALSE, exclude = NULL ) { # Match args impute_type <- match.arg(impute_type) # Checks performed in the `PreprocessorConfig` constructor PreprocessorConfig( complete_cases = complete_cases, remove_features_thres = remove_features_thres, remove_cases_thres = remove_cases_thres, missingness = missingness, impute = impute, impute_type = impute_type, impute_missRanger_params = impute_missRanger_params, impute_discrete = impute_discrete, impute_continuous = impute_continuous, integer2factor = integer2factor, integer2numeric = integer2numeric, logical2factor = logical2factor, logical2numeric = logical2numeric, numeric2factor = numeric2factor, numeric2factor_levels = numeric2factor_levels, numeric_cut_n = numeric_cut_n, numeric_cut_labels = numeric_cut_labels, numeric_quant_n = numeric_quant_n, numeric_quant_NAonly = numeric_quant_NAonly, unique_len2factor = unique_len2factor, character2factor = character2factor, factorNA2missing = factorNA2missing, factorNA2missing_level = factorNA2missing_level, factor2integer = factor2integer, factor2integer_startat0 = factor2integer_startat0, scale = scale, center = center, scale_centers = scale_centers, scale_coefficients = scale_coefficients, remove_constants = remove_constants, remove_constants_skip_missing = remove_constants_skip_missing, remove_features = remove_features, remove_duplicates = remove_duplicates, one_hot = one_hot, one_hot_levels = one_hot_levels, add_date_features = add_date_features, date_features = date_features, add_holidays = add_holidays, exclude = exclude ) } # /setup_Preprocessor # Note: # data_dependent_props <- c( # "scale_centers", # Named vector with feature scaling centers. # "scale_coefficients", # Named vector with feature scaling coefficients. # "one_hot_levels", # Named list of the form "feature_name" = "levels". # "remove_features" # Character vector of feature names to remove. # ) # %% Preprocessor ---- #' @title Preprocessor #' #' @description #' Class to hold output of preprocessing values after applying `PreprocessorConfig` to #' training dataset, so that the same preprocessing can be applied to validation and test #' datasets. #' #' @field config `PreprocessorConfig` object. #' @field preprocessed Data frame or list: Preprocessed data. If a single data.frame is passed to #' `preprocess`, this will be a data.frame. If additional data sets are passed to the #' `dat_validation` and/or `dat_test` arguments, this will be a named list. #' @field values List: Data-dependent preprocessing values to be used for validation and test set #' preprocessing. #' #' @author EDG #' @noRd Preprocessor <- new_class( name = "Preprocessor", properties = list( config = PreprocessorConfig, preprocessed = class_data.frame | class_list, values = class_list ), constructor = function( config, preprocessed, scale_centers = NULL, scale_coefficients = NULL, one_hot_levels = NULL, remove_features = NULL ) { new_object( S7_object(), config = config, preprocessed = preprocessed, values = list( scale_centers = scale_centers, scale_coefficients = scale_coefficients, one_hot_levels = one_hot_levels, remove_features = remove_features ) ) } ) # /Preprocessor # %% repr.Preprocessor ---- method(repr, Preprocessor) <- function( x, pad = 0L, print_df = FALSE, output_type = NULL ) { output_type <- get_output_type(output_type) paste0( repr_S7name("Preprocessor", pad = pad, output_type = output_type), repr_ls(props(x), pad = pad, print_df = print_df) ) } # /rtemis::repr.Preprocessor # %% print.Preprocessor ---- method(print, Preprocessor) <- function(x, pad = 0L, output_type = NULL, ...) { cat(repr(x, output_type = output_type)) invisible(x) } # /rtemis::print.Preprocessor # %% names.Preprocessor ---- method(names, Preprocessor) <- function(x) { names(props(x)) } # %% `$`.Preprocessor ---- # Make props `$`-accessible method(`$`, Preprocessor) <- function(x, name) { props(x)[[name]] } # %% `.DollarNames`.Preprocessor ---- # DollarSign tab-complete property names method(`.DollarNames`, Preprocessor) <- function(x, pattern = "") { all_names <- names(props(x)) grep(pattern, all_names, value = TRUE) } # %% `[`.Preprocessor ---- # Make props `[`-accessible method(`[`, Preprocessor) <- function(x, name) { props(x)[[name]] } # %% `[[`.Preprocessor ---- # Make props `[[`-accessible method(`[[`, Preprocessor) <- function(x, name) { props(x)[[name]] } # %% preprocessed.Preprocessor ---- method(preprocessed, Preprocessor) <- function(x) { x@preprocessed } ================================================ FILE: R/05_Resampler.R ================================================ # S7_Resampler.R # ::rtemis:: # 2025- EDG rtemis.org # References # https://github.com/RConsortium/S7/ # https://rconsortium.github.io/S7 # Description # `ResamplerConfig` class and subclasses create objects that store resampling configuration. # They are set by `setup_Resampler()` and perform type checking and validation. # They are used by `resample()`. # `Resampler` class stores resamples and their configuration. # `Resampler` objects are created by `resample()`. # Note: `id_strat` is used by `resample()`, not individual resamplers # %% ResamplerConfig ---- #' @title ResamplerConfig #' #' @description #' Superclass for resampler configuration. #' #' @field type Character: Type of resampler. #' @field n Integer: Number of resamples. #' #' @author EDG #' @noRd ResamplerConfig <- new_class( name = "ResamplerConfig", properties = list( type = class_character, n = class_integer # scalar_int_pos ), constructor = function(type, n) { # LOOCV does not have a defined number of resamples, so n can be NA_integer_ n <- clean_posint(n, allow_na = TRUE) new_object( S7_object(), type = type, n = n ) } ) # /rtemis::ResamplerConfig # %% `$`.ResamplerConfig ---- # Make S7 properties `$`-accessible method(`$`, ResamplerConfig) <- function(x, name) { prop(x, name) } # %% `[[`.ResamplerConfig ---- # Make S7 properties `[[`-accessible method(`[[`, ResamplerConfig) <- function(x, name) { prop(x, name) } # %% repr.ResamplerConfig ---- #' repr ResamplerConfig #' #' @author EDG #' @keywords internal #' @noRd method(repr, ResamplerConfig) <- function(x, pad = 0L, output_type = NULL) { output_type <- get_output_type(output_type) paste0( repr_S7name(x, pad = pad, output_type = output_type), repr_ls( props(x)[-1], pad = pad, print_class = FALSE, output_type = output_type ) ) } # /rtemis::repr.ResamplerConfig # %% print.ResamplerConfig ---- #' Print ResamplerConfig #' #' @description #' print ResamplerConfig object #' #' @param x ResamplerConfig object #' #' @author EDG #' @noRd method(print, ResamplerConfig) <- function( x, pad = 0L, output_type = c("ansi", "html", "plain"), ... ) { cat(repr(x, pad = pad, output_type = output_type)) invisible(x) } # /rtemis::print.ResamplerConfig # %% desc.ResamplerConfig ---- method(desc, ResamplerConfig) <- function(x) { switch( x@type, KFold = paste0(x@n, " independent folds"), StratSub = paste0(x@n, " stratified subsamples"), StratBoot = paste0(x@n, " stratified bootstraps"), Bootstrap = paste0(x@n, " bootstrap resamples"), Custom = paste0(x@n, " custom resamples"), LOOCV = paste0(x@n, " leave-one-out folds"), paste0(x@n, " resamples") ) } # /rtemis::desc.ResamplerConfig # %% KFoldConfig ---- #' @title KFoldConfig #' #' @description #' ResamplerConfig subclass for k-fold resampling. #' #' @author EDG #' @noRd KFoldConfig <- new_class( name = "KFoldConfig", parent = ResamplerConfig, properties = list( stratify_var = class_character | NULL, strat_n_bins = scalar_int_pos, id_strat = class_vector | NULL, seed = scalar_int_pos ), constructor = function(n, stratify_var, strat_n_bins, id_strat, seed) { new_object( ResamplerConfig( type = "KFold", n = n ), stratify_var = stratify_var, strat_n_bins = strat_n_bins, id_strat = id_strat, seed = seed ) } ) # /rtemis::KFoldConfig # %% StratSubConfig ---- #' @title StratSubConfig #' #' @description #' ResamplerConfig subclass for stratified subsampling. #' #' @author EDG #' @noRd StratSubConfig <- new_class( name = "StratSubConfig", parent = ResamplerConfig, properties = list( n = scalar_int_pos, train_p = scalar_dbl_01excl, stratify_var = class_character | NULL, strat_n_bins = scalar_int_pos, id_strat = class_vector | NULL, seed = scalar_int_pos ), constructor = function( n, train_p, stratify_var, strat_n_bins, id_strat, seed ) { new_object( ResamplerConfig( type = "StratSub", n = n ), train_p = train_p, stratify_var = stratify_var, strat_n_bins = strat_n_bins, id_strat = id_strat, seed = seed ) } ) # /rtemis::StratSubConfig # %% StratBootConfig ---- #' @title StratBootConfig #' #' @description #' ResamplerConfig subclass for stratified bootstrapping. #' #' @author EDG #' @noRd StratBootConfig <- new_class( name = "StratBootConfig", parent = ResamplerConfig, properties = list( stratify_var = class_character | NULL, train_p = scalar_dbl_01excl, strat_n_bins = scalar_int_pos, target_length = scalar_int_pos, id_strat = class_vector | NULL, seed = scalar_int_pos ), constructor = function( n, stratify_var, train_p, strat_n_bins, target_length, id_strat, seed ) { new_object( ResamplerConfig( type = "StratBoot", n = n ), stratify_var = stratify_var, train_p = train_p, strat_n_bins = strat_n_bins, target_length = target_length, id_strat = id_strat, seed = seed ) } ) # /rtemis::StratBootConfig # %% BootstrapConfig ---- #' @title BootstrapConfig #' #' @description #' ResamplerConfig subclass for bootstrap resampling. #' #' @author EDG #' @noRd BootstrapConfig <- new_class( name = "BootstrapConfig", parent = ResamplerConfig, properties = list( id_strat = class_vector | NULL, seed = scalar_int_pos ), constructor = function(n, id_strat, seed) { new_object( ResamplerConfig( type = "Bootstrap", n = n ), id_strat = id_strat, seed = seed ) } ) # /rtemis::BootstrapConfig # %% LOOCVConfig ---- #' @title LOOCVConfig #' #' @description #' ResamplerConfig subclass for leave-one-out cross-validation. #' #' @author EDG #' @noRd LOOCVConfig <- new_class( name = "LOOCVConfig", parent = ResamplerConfig, constructor = function(n) { new_object( ResamplerConfig( type = "LOOCV", n = n ) ) } ) # /rtemis::LOOCVConfig # %% CustomConfig ---- #' @title CustomConfig #' #' @description #' ResamplerConfig subclass for custom resampling. #' #' @author EDG #' @noRd CustomConfig <- new_class( name = "CustomConfig", parent = ResamplerConfig, constructor = function(n) { new_object( ResamplerConfig( type = "Custom", n = n ) ) } ) # /rtemis::CustomConfig # %% setup_Resampler ---- #' Setup Resampler #' #' @param n_resamples Integer: Number of resamples to make. #' @param type Character: Type of resampler: "KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV" #' @param stratify_var Character: Variable to stratify by. #' @param train_p Float: Training set percentage. #' @param strat_n_bins Integer: Number of bins to stratify by. #' @param target_length Integer: Target length for stratified bootstraps. #' @param id_strat Integer: Vector of indices to stratify by. These may be, for example, case IDs #' if your dataset contains repeated measurements. By specifying this vector, you can ensure that #' each case can only be present in the training or test set, but not both. #' @param seed Integer: Random seed. #' @param verbosity Integer: Verbosity level. #' #' @return ResamplerConfig object. #' #' @author EDG #' @export #' #' @examples #' tenfold_resampler <- setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L) #' tenfold_resampler setup_Resampler <- function( n_resamples = 10L, type = c("KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"), # index = NULL, # group = NULL, stratify_var = NULL, train_p = .75, strat_n_bins = 4L, target_length = NULL, id_strat = NULL, seed = NULL, verbosity = 1L ) { # Arguments type <- match_arg( type, c("KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV") ) if (length(type) == 0) { cli::cli_abort( "Invalid resampler type. Must be one of: 'StratSub', 'StratBoot', 'KFold', 'Bootstrap', 'LOOCV'" ) } seed <- clean_int(seed) if (type == "KFold") { KFoldConfig( n = n_resamples, stratify_var = stratify_var, strat_n_bins = strat_n_bins, id_strat = id_strat, seed = seed ) } else if (type == "StratSub") { StratSubConfig( n = n_resamples, train_p = train_p, stratify_var = stratify_var, strat_n_bins = strat_n_bins, id_strat = id_strat, seed = seed ) } else if (type == "StratBoot") { StratBootConfig( n = n_resamples, train_p = train_p, stratify_var = stratify_var, strat_n_bins = strat_n_bins, target_length = target_length, id_strat = id_strat, seed = seed ) } else if (type == "Bootstrap") { BootstrapConfig( n = n_resamples, id_strat = id_strat, seed = seed ) } else if (type == "LOOCV") { LOOCVConfig( n = NA_integer_ ) } else { cli::cli_abort(paste( "Resampler'", type, "'is not supported.", "Supported types are: 'KFold', 'StratSub', 'StratBoot', 'Bootstrap', 'LOOCV'." )) } } # /rtemis::setup_Resampler # %% Resampler ---- #' @title Resampler #' #' @description #' Class for resampling objects. #' #' @author EDG #' @noRd Resampler <- new_class( name = "Resampler", properties = list( type = class_character, resamples = class_list, config = ResamplerConfig ) ) # /rtemis::Resampler # %% repr.Resampler ---- #' repr Resampler #' #' @author EDG #' @keywords internal #' @noRd method(repr, Resampler) <- function(x, pad = 0L, output_type = NULL) { output_type <- get_output_type(output_type) paste0( repr_S7name(x, pad = pad, output_type = output_type), repr_ls( props(x), pad = pad, print_class = FALSE, output_type = output_type ) ) } # /rtemis::repr.Resampler # %% print.Resampler ---- method(print, Resampler) <- function( x, output_type = c("ansi", "html", "plain"), ... ) { cat(repr(x, output_type = output_type)) invisible(x) } # %% names.Resampler ---- method(names, Resampler) <- function(x) { names(x@resamples) } # %% `$`.Resampler ---- # Access Resampler$resamples resamples using `$` ---- method(`$`, Resampler) <- function(x, name) { x@resamples[[name]] } # %% `.DollarNames`.Resampler ---- # DollarSign tab-complete Resampler@resamples names method(`.DollarNames`, Resampler) <- function(x, pattern = "") { all_names <- names(x@resamples) grep(pattern, all_names, value = TRUE) } # %% `[[`.Resampler ---- # Access Resampler$resamples resamples using `[[` ---- method(`[[`, Resampler) <- function(x, index) { x@resamples[[index]] } # %% desc.Resampler ---- method(desc, Resampler) <- function(x) { desc(x@config) } # %% --- Internal functions ---- # %% .list_to_ResamplerConfig ---- #' Convert a list to a ResamplerConfig object #' #' Internal function used by `rtemis.server` and `SuperConfig` deserialization #' to reconstruct a `ResamplerConfig` object from a named list. Not intended #' for direct use by end users. #' #' @param x Named list with the following elements: #' \describe{ #' \item{`type`}{Character: resampler type — one of `"KFold"`, #' `"StratSub"`, `"StratBoot"`, `"Bootstrap"`, `"LOOCV"`, `"Custom"`.} #' \item{`n`}{Integer: number of resamples (not used for `"LOOCV"`).} #' \item{`train_p`}{Numeric: training proportion (used by `"StratSub"` and #' `"StratBoot"`).} #' \item{`stratify_var`}{Character or `NULL`: stratification variable name.} #' \item{`strat_n_bins`}{Integer: number of bins for stratification.} #' \item{`target_length`}{Integer or `NULL`: target resample length #' (`"StratBoot"` only).} #' \item{`id_strat`}{Character or `NULL`: ID stratification variable.} #' \item{`seed`}{Integer or `NULL`: random seed.} #' } #' #' @return A `ResamplerConfig` object of the appropriate subtype. #' #' @author EDG #' @keywords internal #' @export .list_to_ResamplerConfig <- function(x) { switch( x[["type"]], KFold = KFoldConfig( n = x[["n"]], stratify_var = x[["stratify_var"]], strat_n_bins = x[["strat_n_bins"]], id_strat = x[["id_strat"]], seed = x[["seed"]] ), StratSub = StratSubConfig( n = x[["n"]], train_p = x[["train_p"]], stratify_var = x[["stratify_var"]], strat_n_bins = x[["strat_n_bins"]], id_strat = x[["id_strat"]], seed = x[["seed"]] ), StratBoot = StratBootConfig( n = x[["n"]], train_p = x[["train_p"]], stratify_var = x[["stratify_var"]], strat_n_bins = x[["strat_n_bins"]], target_length = x[["target_length"]], id_strat = x[["id_strat"]], seed = x[["seed"]] ), Bootstrap = BootstrapConfig( n = x[["n"]], id_strat = x[["id_strat"]], seed = x[["seed"]] ), LOOCV = LOOCVConfig( n = NA_integer_ ), Custom = CustomConfig( n = x[["n"]] ) ) } # /rtemis::.list_to_ResamplerConfig ================================================ FILE: R/06_Tuner.R ================================================ # S7_Tuner.R # ::rtemis:: # 2025- EDG rtemis.org # References # S7 # https://github.com/RConsortium/S7/ # https://rconsortium.github.io/S7 # future # https://www.futureverse.org/backends.html # Description # `TunerConfig` class and subclasses create objects that store tuner config. # They are set by `setup_GridSearch()` and perform type checking and validation. # They are used by `tune()`. # `Tuner` class and subclasses create objects that store tuning results. # They are created by `tune()`. # Dev # Should both class constructors (e.g. GridSearch@constructor) and setup functions # (e.g. setup_GridSearch) perform type checking and validation? # %% TunerConfig ---- #' TunerConfig #' #' Superclass for tuner config. #' #' @field type Character: Type of tuner. #' @field config Named list of tuner config. #' #' @author EDG #' @noRd TunerConfig <- new_class( name = "TunerConfig", properties = list( type = class_character, config = class_list ) ) # /rtemis::TunerConfig # %% repr.TunerConfig ---- method(repr, TunerConfig) <- function( x, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) paste0( repr_S7name( paste(x@type, "TunerConfig"), pad = pad, output_type = output_type ), repr_ls(x@config, pad = pad, output_type = output_type) ) } # /rtemis::repr.TunerConfig # %% print.TunerConfig ---- method(print, TunerConfig) <- function(x, pad = 0L, ...) { cat(repr(x, pad = pad), "\n") invisible(x) } # %% desc.TunerConfig ---- method(desc, TunerConfig) <- function(x) { if (x@type == "GridSearch") { paste(x@config[["search_type"]], "grid search") } } # %% `$`.TunerConfig ---- # Make TunerConfig@config `$`-accessible method(`$`, TunerConfig) <- function(x, name) { x@config[[name]] } # %% `.DollarNames`.TunerConfig ---- # `$`-autocomplete TunerConfig@config method(`.DollarNames`, TunerConfig) <- function(x, pattern = "") { all_names <- names(x@config) grep(pattern, all_names, value = TRUE) } # %% `[[`.TunerConfig ---- # Make TunerConfig@config `[[`-accessible method(`[[`, TunerConfig) <- function(x, name) { x@config[[name]] } # %% GridSearchConfig ---- #' @title GridSearchConfig #' #' @description #' TunerConfig subclass for grid search config. #' #' @author EDG #' @noRd GridSearchConfig <- new_class( name = "GridSearchConfig", parent = TunerConfig, constructor = function( resampler_config = NULL, search_type = NULL, randomize_p = NULL, metrics_aggregate_fn = NULL, metric = NULL, maximize = NULL ) { check_is_S7(resampler_config, ResamplerConfig) check_inherits(search_type, "character") check_float01exc(randomize_p) check_character(metrics_aggregate_fn) check_inherits(metric, "character") check_inherits(maximize, "logical") # Only assign randomize_p if search_type is "randomized" params <- list( search_type = search_type, resampler_config = resampler_config, metrics_aggregate_fn = metrics_aggregate_fn, metric = metric, maximize = maximize ) if (search_type == "randomized") { params[["randomize_p"]] <- randomize_p } new_object( TunerConfig( type = "GridSearch", config = params ) ) } ) # /rtemis::GridSearchConfig # %% setup_GridSearch ---- #' Setup Grid Search Config #' #' Create a `GridSearchConfig` object that can be passed to [train]. #' #' @param resampler_config `ResamplerConfig` set by [setup_Resampler]. #' @param search_type Character: "exhaustive" or "randomized". Type of #' grid search to use. Exhaustive search will try all combinations of #' config. Randomized will try a random sample of size #' `randomize_p` * `N of total combinations` #' @param randomize_p Float (0, 1): For `search_type == "randomized"`, #' randomly test this proportion of combinations. #' @param metrics_aggregate_fn Character: Name of function to use to aggregate error metrics. #' @param metric Character: Metric to minimize or maximize. #' @param maximize Logical: If TRUE, maximize `metric`, otherwise minimize it. #' #' @return A `GridSearchConfig` object. #' #' @author EDG #' @export #' #' @examples #' gridsearch_config <- setup_GridSearch( #' resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold"), #' search_type = "exhaustive" #' ) #' gridsearch_config setup_GridSearch <- function( resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold"), search_type = "exhaustive", randomize_p = NULL, metrics_aggregate_fn = "mean", metric = NULL, maximize = NULL ) { # Arguments ---- check_is_S7(resampler_config, ResamplerConfig) check_inherits(search_type, "character") check_float01exc(randomize_p) if (search_type == "exhaustive" && !is.null(randomize_p)) { cli::cli_abort("search_type is 'exhaustive': do not set randomize_p.") } # check_inherits(metrics_aggregate_fn, "function") check_character(metrics_aggregate_fn) check_inherits(metric, "character") check_inherits(maximize, "logical") GridSearchConfig( resampler_config = resampler_config, search_type = search_type, randomize_p = randomize_p, metrics_aggregate_fn = metrics_aggregate_fn, metric = metric, maximize = maximize ) } # /rtemis::setup_GridSearch # %% Tuner ---- #' Tuner Class #' #' @field type Character: Type of tuner. #' @field hyperparameters Named list of tunable and fixed hyperparameters. #' @field tuning_results Data.frame: Tuning results. #' @field best_hyperparameters Named list of best hyperparameter values. Includes only #' hyperparameters that were tuned. #' #' @author EDG #' @noRd Tuner <- new_class( name = "Tuner", properties = list( type = class_character, hyperparameters = Hyperparameters, tuner_config = TunerConfig, tuning_results = class_list, # with 2 elements: metrics_training, metrics_validation best_hyperparameters = class_list ) ) # /rtemis::Tuner # %% desc.Tuner ---- method(desc, Tuner) <- function(x) { if (x@type == "GridSearch") { paste(x@tuner_config[["search_type"]], "grid search") } } # /rtemis::describe.Tuner # %% GridSearch ---- #' GridSearch Class #' #' Tuner subclass for grid search. #' #' @author EDG #' @noRd GridSearch <- new_class( name = "GridSearch", parent = Tuner, constructor = function( hyperparameters, tuner_config, tuning_results, best_hyperparameters ) { type <- "GridSearch" new_object( Tuner( type = type, hyperparameters = hyperparameters, tuner_config = tuner_config, tuning_results = tuning_results, best_hyperparameters = best_hyperparameters ) ) } ) # /rtemis::GridSearch # print.GridSearch ---- #' Print GridSearch #' #' Print GridSearch object #' #' @param x GridSearch object. #' @param header Logical: If TRUE, print header with type of tuner. #' @param ... Not used. #' #' @author EDG #' @noRd method(print, GridSearch) <- function(x, header = TRUE, ...) { if (header) { objcat(paste(x@type)) } type <- if (x@tuner_config[["search_type"]] == "exhaustive") { "An exhaustive grid search" } else { paste0( "A randomized grid search (p = ", x@tuner_config[["randomize_p"]], ")" ) } n_param_combs <- NROW(x@tuning_results[["param_grid"]]) cat( type, " of ", singorplu(n_param_combs, "parameter combination"), " was performed using ", desc(x@tuner_config[["resampler_config"]]), ".\n", sep = "" ) cat( x@tuner_config[["metric"]], "was", ifelse(x@tuner_config[["maximize"]], "maximized", "minimized"), "with the following config:\n" ) printls(x@best_hyperparameters) invisible(x) } # /rtemis::print.GridSearch # %% repr.GridSearch ---- method(repr, GridSearch) <- function( x, header = TRUE, pad = 0L, output_type = c("ansi", "html", "plain"), ... ) { output_type <- match.arg(output_type) out <- character() if (header) { out <- paste0(out, repr_S7name(x@type, pad = pad), "\n") } type <- if (x@tuner_config[["search_type"]] == "exhaustive") { "An exhaustive grid search" } else { paste0( "A randomized grid search (p = ", x@tuner_config[["randomize_p"]], ")" ) } n_param_combs <- NROW(x@tuning_results[["param_grid"]]) out <- paste0( out, type, " of ", singorplu(n_param_combs, "parameter combination"), " was performed using ", desc(x@tuner_config[["resampler_config"]]), ".\n" ) out <- paste( out, x@tuner_config[["metric"]], "was", ifelse(x@tuner_config[["maximize"]], "maximized", "minimized"), "with the following config:\n" ) out <- paste( out, repr_ls(x@best_hyperparameters, pad = pad, output_type = output_type), sep = "" ) out } # /rtemis::repr.GridSearch # %% .list_to_TunerConfig ---- #' Convert a list to a TunerConfig object #' #' Internal function used by `rtemis.server` and `SuperConfig` deserialization #' to reconstruct a `TunerConfig` object from a named list. Not intended for #' direct use by end users. #' #' @param x Named list with two elements: #' \describe{ #' \item{`type`}{Character: tuner type. Currently only `"GridSearch"` is #' supported.} #' \item{`config`}{Named list of tuner configuration fields. For #' `"GridSearch"`: `resampler_config` (a list accepted by #' [.list_to_ResamplerConfig()]), `search_type`, `randomize_p`, #' `metrics_aggregate_fn`, `metric`, and `maximize`.} #' } #' #' @return A `TunerConfig` object (currently a `GridSearchConfig`). #' #' @author EDG #' @keywords internal #' @export .list_to_TunerConfig <- function(x) { if (x[["type"]] == "GridSearch") { setup_GridSearch( resampler_config = .list_to_ResamplerConfig(x[["config"]][[ "resampler_config" ]]), search_type = x[["config"]][["search_type"]], randomize_p = x[["config"]][["randomize_p"]], metrics_aggregate_fn = x[["config"]][["metrics_aggregate_fn"]], metric = x[["config"]][["metric"]], maximize = x[["config"]][["maximize"]] ) } else { cli::cli_abort("Unsupported tuner type: {x[['type']]}") } } # /rtemis::.list_to_TunerConfig ================================================ FILE: R/07_Supervised.R ================================================ # S7_Supervised.R # ::rtemis:: # 2025- EDG rtemis.org # References # https://github.com/RConsortium/S7/ # https://rconsortium.github.io/S7 # https://rconsortium.github.io/S7/articles/classes-objects.html?q=computed#computed-properties # https://utf8-icons.com/ # %% VariableImportance ---- #' @title VariableImportance #' #' @description #' Class for variable importance objects. Allows for one or more variable importance measures, #' stored in a data.table with columns "variable", and at least one #' more column with a descriptive name. #' #' @author EDG #' @noRd VariableImportance <- new_class( name = "VariableImportance", properties = list( data = class_data.table ), validator = function(self) { # Must include at least two columns if (NCOL(self@data) < 2L) { cli::cli_abort( "Variable importance data must include at least two columns: 'variable' and at least one importance measure." ) } # Must include column "variable" of type character if (!"variable" %in% names(self@data)) { cli::cli_abort( "Variable importance data must include a 'variable' column." ) } if (!is.character(self@data[["variable"]])) { cli::cli_abort("Column 'variable' must be of type character.") } # All other columns must be numeric other_cols <- setdiff(names(self@data), "variable") if (!all(self@data[, sapply(.SD, is.numeric), .SDcols = other_cols])) { cli::cli_abort( "All columns other than 'variable' must be numeric." ) } # Number of rows will be checked by Supervised to be at least as many as # the number of predictors. } ) # /rtemis::VariableImportance # %% repr.VariableImportance ---- method(repr, VariableImportance) <- function(x, pad = 0L, output_type = NULL) { output_type <- get_output_type(output_type) # "N variable importance measures for M predictors" n_m <- NCOL(x@data) - 1L paste0( repr_S7name("VariableImportance", pad = pad, output_type = output_type), strrep(" ", pad), fmt(n_m, col = highlight_col, bold = TRUE, output_type = output_type), ngettext( n_m, " variable importance measure for ", " variable importance measures for " ), fmt( NROW(x@data), col = highlight_col, bold = TRUE, output_type = output_type ), ngettext(NROW(x@data), " predictor", " predictors") ) } # /rtemis::repr.VariableImportance # %% print.VariableImportance ---- method(print, VariableImportance) <- function(x, output_type = NULL, ...) { cat(repr(x, output_type = output_type), "\n") invisible(x) } # /rtemis::print.VariableImportance # Plot methods # Supervised: plot_varimp # SupervisedRes: plot_varimp, plot_metric # Regression: plot_true_pred, # Classification: plot_true_pred, plot_roc # RegressionRes: plot_metric, plot_true_pred, # ClassificationRes: plot_metric, plot_true_pred, plot_roc # %% Supervised ---- #' @title Supervised #' #' @description #' Superclass for supervised learning models. #' #' @author EDG #' @noRd Supervised <- new_class( name = "Supervised", properties = list( algorithm = class_character, model = class_any, type = class_character, preprocessor = Preprocessor | NULL, preprocessor_internal = Preprocessor | NULL, hyperparameters = Hyperparameters | NULL, tuner = Tuner | NULL, execution_config = ExecutionConfig, y_training = class_any, y_validation = class_any, y_test = class_any, predicted_training = class_any, predicted_validation = class_any, predicted_test = class_any, metrics_training = Metrics, metrics_validation = Metrics | NULL, metrics_test = Metrics | NULL, xnames = class_character, varimp = VariableImportance | NULL, question = class_character | NULL, extra = class_any, session_info = class_any ), constructor = function( algorithm, model, type, preprocessor, preprocessor_internal, hyperparameters, tuner, execution_config, y_training, y_validation, y_test, predicted_training, predicted_validation, predicted_test, metrics_training, metrics_validation, metrics_test, xnames, varimp, question, extra ) { new_object( S7_object(), algorithm = algorithm, model = model, type = type, preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner = tuner, execution_config = execution_config, y_training = y_training, y_validation = y_validation, y_test = y_test, predicted_training = predicted_training, predicted_validation = predicted_validation, predicted_test = predicted_test, metrics_training = metrics_training, metrics_validation = metrics_validation, metrics_test = metrics_test, xnames = xnames, varimp = varimp, question = question, extra = extra, session_info = sessionInfo() ) } ) # /rtemis::Supervised # %% predict.Supervised ---- #' Predict `Supervised` #' #' Predict Method for `Supervised` objects #' #' @param object `Supervised` object. #' @param newdata data.frame or similar: New data to predict. #' #' @noRd method(predict, Supervised) <- function(object, newdata, verbosity = 1L, ...) { check_inherits(newdata, "data.frame") # Apply user-specified preprocessor if available if (!is.null(object@preprocessor)) { newdata <- preprocess( newdata, object@preprocessor, verbosity = verbosity ) |> preprocessed() } # Apply algorithm-specific preprocessor if available if (!is.null(object@preprocessor_internal)) { newdata <- preprocess( newdata, object@preprocessor_internal, verbosity = verbosity ) |> preprocessed() } # After preprocessing, enforce strict predictor names and order if (!identical(names(newdata), object@xnames)) { extra_cols <- setdiff(names(newdata), object@xnames) missing_cols <- setdiff(object@xnames, names(newdata)) cli::cli_abort(c( "x" = "Predictor names and order in newdata must exactly match training data.", "i" = "Expected {length(object@xnames)} columns; got {NCOL(newdata)}.", "i" = if (length(extra_cols) > 0L) { paste0("Unexpected columns: ", paste(extra_cols, collapse = ", ")) } else { "Unexpected columns: none." }, "i" = if (length(missing_cols) > 0L) { paste0("Missing columns: ", paste(missing_cols, collapse = ", ")) } else { "Missing columns: none." } )) } # Call predict_super with fully preprocessed data predict_super( model = object@model, newdata = newdata, type = object@type, verbosity = verbosity ) } # /rtemis::predict.Supervised # %% fitted.Supervised ---- #' Fitted `Supervised` #' #' Fitted Method for `Supervised` objects #' #' @param object `Supervised` object. #' #' @keywords internal #' @noRd method(fitted, Supervised) <- function(object, ...) { object@predicted_training } # /rtemis::fitted.Supervised # %% se.Supervised ---- #' Standard Error `Supervised` #' #' Standard Error Method for `Supervised` objects #' #' @param object `Supervised` object. #' #' @keywords internal #' @noRd method(se, Supervised) <- function(x, ...) { x@se_training } # %% `$`.Supervised ---- # Make Supervised props `$`- accessible method(`$`, Supervised) <- function(x, name) { prop(x, name) } # %% `.DollarNames`.Supervised ---- # `$`-autocomplete Supervised props method(`.DollarNames`, Supervised) <- function(x, pattern = "") { all_names <- names(props(x)) grep(pattern, all_names, value = TRUE) } # %% `[[`.Supervised ---- # Make Supervised props `[[`- accessible ---- method(`[[`, Supervised) <- function(x, name) { prop(x, name) } # %% repr.Supervised ---- #' repr `Supervised` #' #' @param x `Supervised` object. #' #' @author EDG #' #' @keywords internal #' @noRd method(repr, Supervised) <- function( x, pad = 0L, output_type = NULL, filename = NULL ) { output_type <- get_output_type(output_type, filename) # Class name out <- paste0( repr_S7name(x@type, pad = pad, output_type = output_type), highlight(x@algorithm, output_type = output_type), " (", desc_alg(x@algorithm), ")\n" ) # Tuning, if available if (!is.null(x@tuner)) { out <- paste0( out, fmt( "\U2699", col = col_tuner, bold = TRUE, pad = pad, output_type = output_type ), " Tuned using ", desc(x@tuner), ".\n" ) } # Calibration, if available if (prop_exists(x, "calibration_model")) { out <- paste0( out, fmt( "\U27CB", col = col_calibrator, bold = TRUE, pad = pad, output_type = output_type ), " Calibrated using ", desc_alg(x@calibration_model@algorithm), ".\n" ) } out <- paste0(out, "\n") # {Regression, Classification} vs. CalibratedClassification if (prop_exists(x, "calibration_model")) { # CalibratedClassification # Metrics, training out <- paste0( out, # repr(x@metrics_training, pad = 2L, output_type = output_type) repr_CalibratedClassificationMetrics( x@metrics_training, x@metrics_training_calibrated, pad = pad + 2L, output_type = output_type ) ) # Metrics, validation if (length(x@metrics_validation) > 0) { out <- paste0( out, repr_CalibratedClassificationMetrics( x@metrics_validation, x@metrics_validation_calibrated, pad = pad + 2L, output_type = output_type ) ) } # Metrics, test if (length(x@metrics_test) > 0) { out <- paste0( out, "\n", repr_CalibratedClassificationMetrics( x@metrics_test, x@metrics_test_calibrated, pad = pad + 2L, output_type = output_type ) ) } } else { # {Regression, Classification} # Metrics, training out <- paste0( out, repr(x@metrics_training, pad = pad + 2L, output_type = output_type) ) # Metrics, validation if (length(x@metrics_validation) > 0) { out <- paste0( out, repr(x@metrics_validation, pad = pad + 2L, output_type = output_type) ) } # Metrics, test if (length(x@metrics_test) > 0) { out <- paste0( out, "\n", repr(x@metrics_test, pad = pad + 2L, output_type = output_type) ) } } out } # /rtemis::repr.Supervised # %% to_json.Supervised ---- #' to_json `Supervised` #' #' Convert a `Supervised` (or `Regression` / `Classification` / #' `CalibratedClassification`) object to a JSON-serializable list. Excludes #' the model object, full prediction vectors, full outcome vectors, the #' R session_info, and `extra` — all of which are either not JSON-friendly, #' too large for the control-plane response, or fetched separately as #' Arrow IPC bulk data. #' #' @param x `Supervised` object. #' #' @return Named list. Pass to `jsonlite::toJSON(auto_unbox = TRUE)`. #' #' @author EDG #' @keywords internal #' @noRd method(to_json, Supervised) <- function(x, ...) { # Use `.to_json_value()` for every prop — it handles nested S7 objects, # nested lists containing S7 objects, and primitive types uniformly. # That matters when a prop's *declared* type is S7 but the actual value # is a primitive (e.g. `varimp` is sometimes a plain numeric vector # rather than a VariableImportance — a rtemis-internal type mismatch # that this method must tolerate). out <- list( .class = S7_class(x)@name, algorithm = x@algorithm, type = x@type, question = x@question, xnames = x@xnames, n_features = length(x@xnames), preprocessor = .to_json_value(x@preprocessor), preprocessor_internal = .to_json_value(x@preprocessor_internal), hyperparameters = .to_json_value(x@hyperparameters), tuner = .to_json_value(x@tuner), execution_config = .to_json_value(x@execution_config), metrics_training = .to_json_value(x@metrics_training), metrics_validation = .to_json_value(x@metrics_validation), metrics_test = .to_json_value(x@metrics_test), varimp = .to_json_value(x@varimp) ) # Subclass-specific extras if (prop_exists(x, "binclasspos")) { out[["binclasspos"]] <- x@binclasspos } if (prop_exists(x, "se_training")) { # Regression: don't serialize full SE vectors (large); flag presence only out[["has_se"]] <- !is.null(x@se_training) } if (prop_exists(x, "calibration_model")) { out[["calibration_model"]] <- .to_json_value(x@calibration_model) if (prop_exists(x, "metrics_training_calibrated")) { out[["metrics_training_calibrated"]] <- .to_json_value(x@metrics_training_calibrated) } if (prop_exists(x, "metrics_validation_calibrated")) { out[["metrics_validation_calibrated"]] <- .to_json_value(x@metrics_validation_calibrated) } if (prop_exists(x, "metrics_test_calibrated")) { out[["metrics_test_calibrated"]] <- .to_json_value(x@metrics_test_calibrated) } } Filter(Negate(is.null), out) } # /rtemis::to_json.Supervised # %% print.Supervised ---- method(print, Supervised) <- function( x, output_type = c("ansi", "html", "plain"), ... ) { cat(repr(x, output_type = output_type)) invisible(x) } # /rtemis::print.Supervised # %% describe.Supervised ---- #' Describe `Supervised` object #' #' @param x `Supervised` object. #' @param ... Not used. #' #' @return Character string describing the `Supervised` object, invisibly. #' #' @author EDG #' @noRd #' #' @examples #' species_lightrf <- train(iris, algorithm = "lightrf") #' describe(species_lightrf) method(describe, Supervised) <- function(x) { type <- x@type algorithm <- desc_alg(x@algorithm) cat(algorithm, " was used for ", tolower(type), ".\n", sep = "") desc <- paste0(algorithm, " was used for ", tolower(type), ".") # Tuning ---- if (length(x@tuner) > 0) { describe(x@tuner) } # Metrics ---- if (type == "Classification") { cat( "Balanced accuracy was", ddSci(x@metrics_training[["Overall"]][["Balanced_Accuracy"]]), "on the training set" ) desc <- paste( desc, "Balanced accuracy was", ddSci(x@metrics_training[["Overall"]][["Balanced_Accuracy"]]), "in the training set" ) if (!is.null(x@metrics_test[["Overall"]][["Balanced_Accuracy"]])) { cat( " and", ddSci(x@metrics_test[["Overall"]][["Balanced_Accuracy"]]), "in the test set." ) desc <- paste( desc, "and", ddSci(x@metrics_test[["Overall"]][["Balanced_Accuracy"]]), "in the test set." ) } else { cat(".") desc <- paste0(desc, ".") } } else if (type == "Regression") { cat( "R-squared was", ddSci(x@metrics_training[["Rsq"]]), "in the training set" ) desc <- paste( desc, "R-squared was", ddSci(x@metrics_training[["Rsq"]]), "on the training set" ) if (!is.null(x@metrics_test[["Rsq"]])) { cat( " and", ddSci(x@metrics_test[["Rsq"]]), "in the test." ) desc <- paste( desc, "and", ddSci(x@metrics_test[["Rsq"]]), "on the test set." ) } else { cat(".") desc <- paste0(desc, ".") } } cat("\n") invisible(desc) } # /rtemis::describe.Supervised # %% Classification ---- #' @title Classification #' #' @description #' Supervised subclass for classification models. #' #' @author EDG #' @noRd Classification <- new_class( name = "Classification", parent = Supervised, properties = list( predicted_prob_training = class_double | class_data.frame | NULL, predicted_prob_validation = class_double | class_data.frame | NULL, predicted_prob_test = class_double | class_data.frame | NULL, binclasspos = class_integer ), constructor = function( algorithm = NULL, model = NULL, preprocessor = NULL, # Preprocessor preprocessor_internal = NULL, # Algorithm-specific preprocessor hyperparameters = NULL, # Hyperparameters tuner = NULL, # Tuner execution_config, y_training = NULL, y_validation = NULL, y_test = NULL, predicted_training = NULL, predicted_validation = NULL, predicted_test = NULL, xnames = NULL, varimp = NULL, question = NULL, extra = NULL, predicted_prob_training = NULL, predicted_prob_validation = NULL, predicted_prob_test = NULL, binclasspos = 2L ) { metrics_training <- classification_metrics( true_labels = y_training, predicted_labels = predicted_training, predicted_prob = predicted_prob_training, sample = "Training" ) metrics_validation <- if (!is.null(y_validation)) { classification_metrics( true_labels = y_validation, predicted_labels = predicted_validation, predicted_prob = predicted_prob_validation, sample = "Validation" ) } else { NULL } metrics_test <- if (!is.null(y_test)) { classification_metrics( true_labels = y_test, predicted_labels = predicted_test, predicted_prob = predicted_prob_test, sample = "Test" ) } else { NULL } new_object( Supervised( algorithm = algorithm, model = model, type = "Classification", preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner = tuner, execution_config = execution_config, y_training = y_training, y_validation = y_validation, y_test = y_test, predicted_training = predicted_training, predicted_validation = predicted_validation, predicted_test = predicted_test, metrics_training = metrics_training, metrics_validation = metrics_validation, metrics_test = metrics_test, xnames = xnames, varimp = varimp, question = question, extra = extra ), predicted_prob_training = predicted_prob_training, predicted_prob_validation = predicted_prob_validation, predicted_prob_test = predicted_prob_test, binclasspos = binclasspos ) } ) # /rtemis::Classification # %% CalibratedClassification ---- #' @title CalibratedClassification #' #' @description #' Classification subclass for calibrated classification models. #' The classification_model can be trained on any data, ideally different from any data used by #' the classification model. #' #' @author EDG #' @noRd CalibratedClassification <- new_class( name = "CalibratedClassification", parent = Classification, properties = list( calibration_model = Supervised, predicted_training_calibrated = class_vector, predicted_validation_calibrated = class_vector | NULL, predicted_test_calibrated = class_vector | NULL, predicted_prob_training_calibrated = class_double, predicted_prob_validation_calibrated = class_double | NULL, predicted_prob_test_calibrated = class_double | NULL, metrics_training_calibrated = Metrics, metrics_validation_calibrated = Metrics | NULL, metrics_test_calibrated = Metrics | NULL ), constructor = function(classification_model, calibration_model) { # Predict calibrated probabilities of classification model datasets predicted_prob_training_calibrated <- predict( calibration_model, data.frame( predicted_probabilities = classification_model@predicted_prob_training ), ) predicted_prob_validation_calibrated <- if ( !is.null(classification_model@predicted_prob_validation) ) { predict( calibration_model, data.frame( predicted_probabilities = classification_model@predicted_prob_validation ) ) } else { NULL } predicted_prob_test_calibrated <- if ( !is.null(classification_model@predicted_prob_test) ) { predict( calibration_model, data.frame( predicted_probabilities = classification_model@predicted_prob_test ) ) } else { NULL } # Predict calibrated labels of classification model datasets predicted_training_calibrated <- prob2categorical( predicted_prob_training_calibrated, levels = levels(classification_model@y_training) ) predicted_validation_calibrated <- if ( !is.null(classification_model@predicted_prob_validation) ) { prob2categorical( predicted_prob_validation_calibrated, levels = levels(classification_model@y_validation) ) } else { NULL } predicted_test_calibrated <- if ( !is.null(classification_model@predicted_prob_test) ) { prob2categorical( predicted_prob_test_calibrated, levels = levels(classification_model@y_test) ) } else { NULL } metrics_training_calibrated <- classification_metrics( true_labels = classification_model@y_training, predicted_labels = predicted_training_calibrated, predicted_prob = predicted_prob_training_calibrated, sample = "Calibrated Training" ) metrics_validation_calibrated <- if ( !is.null(classification_model@y_validation) ) { classification_metrics( true_labels = classification_model@y_validation, predicted_labels = predicted_validation_calibrated, predicted_prob = predicted_prob_validation_calibrated, sample = "Calibrated Validation" ) } else { NULL } metrics_test_calibrated <- if (!is.null(classification_model@y_test)) { classification_metrics( true_labels = classification_model@y_test, predicted_labels = predicted_test_calibrated, predicted_prob = predicted_prob_test_calibrated, sample = "Calibrated Test" ) } else { NULL } new_object( classification_model, calibration_model = calibration_model, predicted_training_calibrated = predicted_training_calibrated, predicted_validation_calibrated = predicted_validation_calibrated, predicted_test_calibrated = predicted_test_calibrated, predicted_prob_training_calibrated = predicted_prob_training_calibrated, predicted_prob_validation_calibrated = predicted_prob_validation_calibrated, predicted_prob_test_calibrated = predicted_prob_test_calibrated, metrics_training_calibrated = metrics_training_calibrated, metrics_validation_calibrated = metrics_validation_calibrated, metrics_test_calibrated = metrics_test_calibrated ) } ) # /rtemis::CalibratedClassification # %% predict.CalibratedClassification ---- method(predict, CalibratedClassification) <- function(object, newdata, ...) { check_inherits(newdata, "data.frame") # Get the classification model's predicted probabilities raw_prob <- do_call( predict_super, list(model = object@model, newdata = newdata, type = "Classification") ) # Get the calibration model's predicted probabilities predict( object@calibration_model, newdata = data.frame(predicted_probabilities = raw_prob) ) } # /rtemis::predict.CalibratedClassification se_compat_algorithms <- c("GLM", "GAM") # %% Regression ---- #' @title Regression #' @description #' Supervised subclass for regression models. #' #' @author EDG #' @noRd Regression <- new_class( name = "Regression", parent = Supervised, properties = list( se_training = class_double | NULL, se_validation = class_double | NULL, se_test = class_double | NULL ), constructor = function( algorithm = NULL, model = NULL, preprocessor = NULL, # Preprocessor preprocessor_internal = NULL, # Algorithm-specific preprocessor hyperparameters = NULL, # Hyperparameters tuner = NULL, # Tuner execution_config, # ExecutionConfig y_training = NULL, y_validation = NULL, y_test = NULL, predicted_training = NULL, predicted_validation = NULL, predicted_test = NULL, se_training = NULL, se_validation = NULL, se_test = NULL, xnames = NULL, varimp = NULL, question = NULL, extra = NULL ) { # Metrics ---- metrics_training <- regression_metrics( y_training, predicted_training, sample = "Training" ) metrics_validation <- if (!is.null(y_validation)) { regression_metrics( y_validation, predicted_validation, sample = "Validation" ) } else { NULL } metrics_test <- if (!is.null(y_test)) { regression_metrics( y_test, predicted_test, sample = "Test" ) } else { NULL } new_object( Supervised( algorithm = algorithm, model = model, type = "Regression", preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner = tuner, execution_config = execution_config, y_training = y_training, y_validation = y_validation, y_test = y_test, predicted_training = predicted_training, predicted_validation = predicted_validation, predicted_test = predicted_test, metrics_training = metrics_training, metrics_validation = metrics_validation, metrics_test = metrics_test, xnames = xnames, varimp = varimp, question = question, extra = extra ), se_training = se_training, se_validation = se_validation, se_test = se_test ) } ) # /rtemis::Regression # %% plot_true_pred.Regression ---- #' Plot True vs. Predicted for Regression #' #' @param x `Regression` object. #' @param what Character vector: What to plot. Can include "training", "validation", "test", or #' "all", which will plot all available. #' @param fit Character: Algorithm to use to draw fit line. #' @param theme `Theme` object. #' @param labelify Logical: If TRUE, labelify the axis labels. #' @param ... Additional arguments passed to the plotting function. #' #' @author EDG #' @keywords internal #' @noRd method(plot_true_pred, Regression) <- function( x, what = "all", fit = "glm", theme = choose_theme(getOption("rtemis_theme")), labelify = TRUE, ... ) { if (length(what) == 1 && what == "all") { what <- c("training", "validation", "test") } true <- paste0("y_", what) true_l <- Filter( Negate(is.null), sapply(true, function(z) prop(x, z)) ) predicted <- paste0("predicted_", what) predicted_l <- Filter( Negate(is.null), sapply(predicted, function(z) prop(x, z)) ) if (labelify) { names(predicted_l) <- labelify(names(predicted_l)) } draw_fit( x = true_l, y = predicted_l, fit = fit, theme = theme, ... ) } # /rtemis::plot_true_pred.Regression # %% plot_true_pred.Classification ---- #' Plot True vs. Predicted for Classification #' #' @param x `Classification` object. #' @param what Character vector: What to plot. "training", "validation", "test" #' @param xlab Optional Character: x axis label. If NULL, will be generated automatically. #' @param theme `Theme` object. #' @param ... Additional arguments passed to the plotting function. #' #' @author EDG #' @keywords internal #' @noRd method(plot_true_pred, Classification) <- function( x, what = NULL, xlab = NULL, theme = choose_theme(getOption("rtemis_theme")), ... ) { if (is.null(what)) { if (!is.null(x@metrics_test)) { what <- "test" } else if (!is.null(x@metrics_validation)) { what <- "validation" } else { what <- "training" } } .confmat <- if (what == "training") { x@metrics_training } else if (what == "validation") { x@metrics_validation } else if (what == "test") { x@metrics_test } if (is.null(xlab)) { xlab <- labelify(paste("Predicted", what)) } draw_confusion( .confmat, theme = theme, xlab = xlab, ... ) } # /rtemis::plot_true_pred.Classification # %% plot_roc.Classification ---- method(plot_roc, Classification) <- function( x, what = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), filename = NULL, ... ) { if (is.null(x@predicted_prob_training)) { msg(highlight2("No predicted probabilities available.")) return(invisible()) } if (is.null(what)) { what <- if (!is.null(x@metrics_test)) { c("training", "test") } else { "training" } } labelsl <- probl <- list() if ("training" %in% what) { labelsl[["Training"]] <- x@y_training probl[["Training"]] <- x@predicted_prob_training } if ("test" %in% what && !is.null(x@predicted_prob_test)) { labelsl[["Test"]] <- x@y_test probl[["Test"]] <- x@predicted_prob_test } draw_roc( true_labels = labelsl, predicted_prob = probl, theme = theme, palette = palette, legend_title = "Sample (AUC)", filename = filename, ... ) } # /rtemis::plot_ROC.Classification # %% make_Supervised ---- make_Supervised <- function( algorithm = NULL, model = NULL, preprocessor = NULL, preprocessor_internal = NULL, hyperparameters = NULL, tuner = NULL, execution_config, y_training = NULL, y_validation = NULL, y_test = NULL, predicted_training = NULL, predicted_validation = NULL, predicted_test = NULL, predicted_prob_training = NULL, predicted_prob_validation = NULL, predicted_prob_test = NULL, se_training = NULL, se_validation = NULL, se_test = NULL, xnames = character(), varimp = NULL, question = character(), extra = NULL, binclasspos = 2L ) { # Supervised ---- if (is.factor(y_training)) { Classification( algorithm = algorithm, model = model, preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner = tuner, execution_config = execution_config, y_training = y_training, y_validation = y_validation, y_test = y_test, predicted_training = predicted_training, predicted_validation = predicted_validation, predicted_test = predicted_test, predicted_prob_training = predicted_prob_training, predicted_prob_validation = predicted_prob_validation, predicted_prob_test = predicted_prob_test, xnames = xnames, varimp = varimp, question = question, extra = extra, binclasspos = binclasspos ) } else { Regression( algorithm = algorithm, model = model, preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner = tuner, execution_config = execution_config, y_training = y_training, y_validation = y_validation, y_test = y_test, predicted_training = predicted_training, predicted_validation = predicted_validation, predicted_test = predicted_test, se_training = se_training, se_validation = se_validation, se_test = se_test, xnames = xnames, varimp = varimp, question = question, extra = extra ) } } # /rtemis::make_Supervised # %% write_Supervised ---- write_Supervised <- function( object, outdir = NULL, save_mod = FALSE, theme = choose_theme(getOption("rtemis_theme")), verbosity = 1L ) { if (verbosity > 0L) { print(object) } if (save_mod) { rt_save(object, outdir, verbosity = verbosity) } } # /rtemis::write_Supervised # %% present.Regression ---- # present method for Regression objects # Plot training + test metrics, if available, side by side using `plotly::subplot()` # & run `describe()` on the object method(present, Regression) <- function( x, what = c("training", "test"), theme = choose_theme(getOption("rtemis_theme")), filename = NULL, ... ) { # Describe the model describe(x) # Plot True vs. Predicted plot_true_pred( x, what = what, theme = theme, filename = filename, ... ) } # /rtemis::present.Regression # %% present.Classification ---- # present method for Classification objects # Plot training + test metrics if available, side by side method(present, Classification) <- function( x, what = c("training", "test"), type = c("ROC", "confusion"), theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), filename = NULL, ... ) { type <- match.arg(type) # Describe the model describe(x) if (type == "ROC") { plot_roc( x, what = what, theme = theme, palette = palette, filename = filename ) } else if (type == "confusion") { # Training set plot if ("training" %in% what) { plot_training <- plot_true_pred( x, what = "training", theme = theme, xlab = "Predicted Training" ) } else { plot_training <- NULL } # Test set plot if ("test" %in% what && !is.null(x@y_test)) { plot_test <- plot_true_pred( x, what = "test", theme = theme, xlab = "Predicted Test" ) } else { plot_test <- NULL } # Combined plot # classification: confusion matrices side by side plotly::subplot( plot_training, plot_test, nrows = 1L, shareX = FALSE, shareY = FALSE, titleX = TRUE, titleY = TRUE, margin = 0.01 ) } } # /rtemis::present.Classification # %% SupervisedRes ---- # fields metrics_training/metrics_validation/metrics_test # could be active bindings that are collected from @models #' SupervisedRes #' #' @description #' Superclass for Resampled supervised learning models. #' #' @author EDG #' @noRd SupervisedRes <- new_class( name = "SupervisedRes", properties = list( algorithm = class_character, models = class_list, type = class_character, preprocessor = Preprocessor | NULL, preprocessor_internal = Preprocessor | NULL, hyperparameters = Hyperparameters | NULL, tuner_config = TunerConfig | NULL, outer_resampler = Resampler, execution_config = ExecutionConfig, y_training = class_any, y_test = class_any, predicted_training = class_any, predicted_test = class_any, metrics_training = MetricsRes, metrics_test = MetricsRes, xnames = class_character, varimp = class_list | NULL, question = class_character | NULL, extra = class_any, session_info = class_any ), constructor = function( algorithm, models, type, preprocessor, preprocessor_internal, hyperparameters, tuner_config, outer_resampler, execution_config, y_training, y_test, predicted_training, predicted_test, metrics_training, metrics_test, metrics_training_mean, metrics_test_mean, xnames, varimp, question, extra ) { new_object( S7::S7_object(), algorithm = algorithm, models = models, type = models[[1]]@type, preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampler = outer_resampler, execution_config = execution_config, y_training = y_training, y_test = y_test, predicted_training = predicted_training, predicted_test = predicted_test, metrics_training = metrics_training, metrics_test = metrics_test, # metrics_training_mean = metrics_training_mean, # metrics_test_mean = metrics_test_mean, xnames = xnames, varimp = varimp, question = question, extra = extra, session_info = sessionInfo() ) } ) # /rtemis::SupervisedRes # %% repr.SupervisedRes ---- #' repr `SupervisedRes` #' #' @param x `SupervisedRes` object. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' @param filename Character: Filename to save output to (not used). #' #' @author EDG #' #' @keywords internal #' @noRd method(repr, SupervisedRes) <- function( x, output_type = NULL, filename = NULL ) { output_type <- get_output_type(output_type, filename) # Class name + Alg name (2 lines) out <- paste0( repr_S7name(paste("Resampled", x@type, "Model"), output_type = output_type), highlight(x@algorithm, output_type = output_type), " (", desc_alg(x@algorithm), ")\n" ) # Tuning, if available (1 line) if (!is.null(x@tuner_config)) { out <- paste0( out, fmt("\U2699", col = col_tuner, bold = TRUE, output_type = output_type), " Tuned using ", desc(x@tuner_config), ".\n" ) } # Outer resampling (1 line) out <- paste0( out, fmt("\U27F3", col = col_outer, bold = TRUE, output_type = output_type), " Tested using ", desc(x@outer_resampler), ".\n" ) # Calibration, if available if (prop_exists(x, "calibration_models")) { out <- paste0( out, fmt( "\U27CB", col = col_calibrator, bold = TRUE, output_type = output_type ), " Calibrated using ", desc_alg(x@calibration_models[[1]]@algorithm), " with ", desc(x@calibration_models[[1]]@outer_resampler@config), ".\n" ) } out <- paste0(out, "\n") # Metrics, training if (prop_exists(x, "calibration_models")) { out <- paste0( out, repr_CalibratedClassificationResMetrics( x@metrics_training, x@metrics_training_calibrated, pad = 2L, output_type = output_type ) ) } else { out <- paste0( out, repr(x@metrics_training, pad = 2L, output_type = output_type) ) } # Metrics, test if (prop_exists(x, "calibration_models")) { out <- paste0( out, "\n", repr_CalibratedClassificationResMetrics( x@metrics_test, x@metrics_test_calibrated, pad = 2L, output_type = output_type ) ) } else { out <- paste0( out, "\n", repr(x@metrics_test, pad = 2L, output_type = output_type) ) } out } # /rtemis::repr.SupervisedRes # %% to_json.SupervisedRes ---- #' to_json `SupervisedRes` #' #' Convert a `SupervisedRes` (or `RegressionRes` / `ClassificationRes`) #' object to a JSON-serializable list. The list of per-resample fitted #' models (`@models`) is summarised by length only — individual model #' details remain available on the server and can be fetched via #' separate `job.result` requests if needed. #' #' @param x `SupervisedRes` object. #' #' @return Named list. Pass to `jsonlite::toJSON(auto_unbox = TRUE)`. #' #' @author EDG #' @keywords internal #' @noRd method(to_json, SupervisedRes) <- function(x, ...) { out <- list( .class = S7_class(x)@name, algorithm = x@algorithm, type = x@type, question = x@question, xnames = x@xnames, n_features = length(x@xnames), n_resamples = length(x@models), preprocessor = .to_json_value(x@preprocessor), preprocessor_internal = .to_json_value(x@preprocessor_internal), hyperparameters = .to_json_value(x@hyperparameters), tuner_config = .to_json_value(x@tuner_config), outer_resampler = .to_json_value(x@outer_resampler), execution_config = .to_json_value(x@execution_config), metrics_training = .to_json_value(x@metrics_training), metrics_test = .to_json_value(x@metrics_test), # varimp is `class_list | NULL` of VariableImportance. # `.to_json_value` recurses through lists, dispatching `to_json` on # S7 elements and passing through anything else. varimp_per_resample = .to_json_value(x@varimp) ) Filter(Negate(is.null), out) } # /rtemis::to_json.SupervisedRes # %% print.SupervisedRes ---- method(print, SupervisedRes) <- function( x, output_type = NULL, ... ) { cat(repr(x, output_type = output_type)) invisible(x) } # /rtemis::print.SupervisedRes # %% predict.SupervisedRes ---- #' Predict SupervisedRes #' #' Predict Method for SupervisedRes objects #' #' @param object `SupervisedRes` object. #' @param newdata data.frame or similar: New data to predict. #' @param type Character: Type of prediction to output: "avg" applies `avg_fn` (default "mean") to #' the predictions of individual models, "all" returns the predictions of all models in a #' data.frame. "metrics" returns a list of data.frames with a) predictions from each model, b) #' the mean of the predictions, and c) the standard deviation of the predictions. #' @param ... Not used. #' #' @keywords internal #' @noRd method(predict, SupervisedRes) <- function( object, newdata, type = c("avg", "all", "metrics"), avg_fn = "mean", ... ) { check_inherits(newdata, "data.frame") type <- match.arg(type) predicted <- sapply( object@models, function(mod) { predict(mod, newdata = newdata) } ) # -> data.frame n cases x n resamples if (type == "all") { return(predicted) } else if (type == "avg") { return(apply(predicted, 1, avg_fn)) } else if (type == "metrics") { mean_predictions <- apply(predicted, 2, mean) sd_predictions <- apply(predicted, 2, sd) return(list( predictions = predicted, mean = mean_predictions, sd = sd_predictions )) } } # /rtemis::predict.SupervisedRes # %% ClassificationRes ---- #' @title ClassificationRes #' #' @description #' SupervisedRes subclass for Resampled classification models. #' #' @author EDG #' @noRd ClassificationRes <- new_class( name = "ClassificationRes", parent = SupervisedRes, properties = list( predicted_prob_training = class_any, predicted_prob_test = class_any ), constructor = function( algorithm, models, preprocessor, preprocessor_internal = NULL, hyperparameters, tuner_config, outer_resampler, execution_config, y_training, y_validation = NULL, y_test = NULL, predicted_training = NULL, predicted_test = NULL, predicted_prob_training = NULL, predicted_prob_test = NULL, xnames = NULL, varimp = NULL, question = NULL, extra = NULL ) { metrics_training <- ClassificationMetricsRes( sample = "Training", res_metrics = lapply(models, function(mod) mod@metrics_training) ) metrics_test <- ClassificationMetricsRes( sample = "Test", res_metrics = lapply(models, function(mod) mod@metrics_test) ) new_object( SupervisedRes( algorithm = algorithm, models = models, type = "Classification", preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampler = outer_resampler, execution_config = execution_config, y_training = y_training, y_test = y_test, predicted_training = predicted_training, predicted_test = predicted_test, metrics_training = metrics_training, metrics_test = metrics_test, # metrics_training_mean = metrics_training_mean, # metrics_test_mean = metrics_test_mean, xnames = xnames, varimp = varimp, question = question, extra = extra ), predicted_prob_training = predicted_prob_training, predicted_prob_test = predicted_prob_test ) } ) # /rtemis::ClassificationRes # %% CalibratedClassificationRes ---- #' @title CalibratedClassificationRes #' #' @description #' ClassificationRes subclass for calibrated classification models. #' The calibration models are trained on resamples of the `ClassificationRes`'s test data. #' #' @author EDG #' @noRd # We use getter functions to avoid duplicating data CalibratedClassificationRes <- new_class( name = "CalibratedClassificationRes", parent = ClassificationRes, properties = list( calibration_models = class_list, predicted_training_calibrated = new_property( getter = function(self) { lapply(self@calibration_models, function(mod) { mod@predicted_training }) } ), predicted_test_calibrated = new_property( getter = function(self) { lapply(self@calibration_models, function(mod) { mod@predicted_test }) } ), predicted_prob_training_calibrated = new_property( getter = function(self) { lapply(self@calibration_models, function(mod) { mod@predicted_prob_training }) } ), predicted_prob_test_calibrated = new_property( getter = function(self) { lapply(self@calibration_models, function(mod) { mod@predicted_prob_test }) } ), metrics_training_calibrated = ClassificationMetricsRes, metrics_test_calibrated = ClassificationMetricsRes ), constructor = function(ClassificationRes_model, calibrations_models) { # Aggregate calibrated metrics from individual models within each calibration resample # calibrations_models is a list of *Res objects, each containing multiple models # We need to extract all individual model metrics and aggregate them all_training_metrics <- unlist( lapply(calibrations_models, function(calmod) { calmod@metrics_training@res_metrics }), recursive = FALSE ) all_test_metrics <- unlist( lapply(calibrations_models, function(calmod) { calmod@metrics_test@res_metrics }), recursive = FALSE ) metrics_training_calibrated <- ClassificationMetricsRes( sample = "Training", res_metrics = all_training_metrics ) metrics_test_calibrated <- ClassificationMetricsRes( sample = "Test", res_metrics = all_test_metrics ) new_object( ClassificationRes_model, calibration_models = calibrations_models, metrics_training_calibrated = metrics_training_calibrated, metrics_test_calibrated = metrics_test_calibrated ) } ) # /rtemis::CalibratedClassificationRes # %% predict.CalibratedClassificationRes ---- method(predict, CalibratedClassificationRes) <- function( object, newdata, what = c("avg", "all", "metrics"), avg_fn = "mean", ... ) { check_inherits(newdata, "data.frame") what <- match.arg(what) # Check lengths match if (length(object@models) != length(object@calibration_models)) { cli::cli_abort("Number of base models and calibration models must match.") } predicted <- mapply( function(base_mod, cal_mod) { # 1. Predict with base model raw_prob <- predict( base_mod, newdata = newdata ) # 2. Predict with calibration model predict( cal_mod, newdata = data.frame(predicted_probabilities = raw_prob) ) }, object@models, object@calibration_models, SIMPLIFY = TRUE ) # -> matrix n cases x n resamples if (what == "all") { return(predicted) } else if (what == "avg") { return(apply(predicted, 1, avg_fn)) } else if (what == "metrics") { mean_predictions <- apply(predicted, 2, mean) sd_predictions <- apply(predicted, 2, sd) # Return both aggregated prediction metrics (per resample) # Keeping consistent with SupervisedRes return(list( predictions = predicted, mean = mean_predictions, sd = sd_predictions )) } } # /rtemis::predict.CalibratedClassificationRes # %% RegressionRes ---- #' @title RegressionRes #' #' @description #' SupervisedRes subclass for Resampled regression models. #' #' @author EDG #' @noRd RegressionRes <- new_class( name = "RegressionRes", parent = SupervisedRes, properties = list( se_training = class_any, se_validation = class_any, se_test = class_any ), constructor = function( algorithm, models, preprocessor, preprocessor_internal, hyperparameters, tuner_config, outer_resampler, execution_config, y_training, y_validation = NULL, y_test = NULL, predicted_training = NULL, predicted_test = NULL, se_training = NULL, se_test = NULL, xnames = NULL, varimp = NULL, question = NULL, extra = NULL ) { metrics_training <- lapply( models, function(mod) mod@metrics_training@metrics ) metrics_test <- lapply(models, function(mod) mod@metrics_test@metrics) metrics_training <- RegressionMetricsRes( sample = "Training", res_metrics = lapply(models, function(mod) mod@metrics_training) ) metrics_test <- RegressionMetricsRes( sample = "Test", res_metrics = lapply(models, function(mod) mod@metrics_test) ) new_object( SupervisedRes( algorithm = algorithm, models = models, type = "Regression", preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampler = outer_resampler, execution_config = execution_config, y_training = y_training, y_test = y_test, predicted_training = predicted_training, predicted_test = predicted_test, metrics_training = metrics_training, metrics_test = metrics_test, # metrics_training_mean = metrics_training_mean, # metrics_test_mean = metrics_test_mean, xnames = xnames, varimp = varimp, question = question, extra = extra ), se_training = se_training, se_test = se_test ) } ) # /rtemis::RegressionRes # %% desc.SupervisedRes ---- method(desc, SupervisedRes) <- function(x, metric = NULL) { type <- x@type algorithm <- desc_alg(x@algorithm) # cat(algorithm, " was used for ", tolower(type), ".\n", sep = "") out <- paste0(algorithm, " was used for ", tolower(type), ".") # Tuning ---- if (length(x@tuner_config) > 0) { out <- paste0( out, " Hyperparameter tuning was performed using ", desc(x@tuner_config), "." ) } # Metrics ---- if (type == "Classification") { if (is.null(metric)) { metric <- "Balanced_Accuracy" } out <- paste( out, "Mean", labelify(metric, toLower = TRUE), "was", ddSci(x@metrics_training@mean_metrics[["Balanced_Accuracy"]]), "in the training set and", ddSci(x@metrics_test@mean_metrics[["Balanced_Accuracy"]]), "in the test set across " ) } else if (type == "Regression") { out <- paste( out, "Mean R-squared was", ddSci(x@metrics_training@mean_metrics[["Rsq"]]), "on the training set and", ddSci(x@metrics_test@mean_metrics[["Rsq"]]), "on the test set across " ) } out <- paste0(out, desc(x@outer_resampler), ".") invisible(out) } # /rtemis::desc.SupervisedRes # %% describe.SupervisedRes ---- #' Describe `SupervisedRes` #' #' @param x `SupervisedRes` object. #' @param ... Not used. #' #' @return Character string describing the `SupervisedRes` object, invisibly. #' #' @author EDG #' @noRd #' #' @examples #' mod <- train(iris, algorithm = "CART", outer_resampling_config = setup_Resampler()) #' describe(mod) method(describe, SupervisedRes) <- function(x, ...) { cat(desc(x), "\n") } # %% present.SupervisedRes ---- method(present, SupervisedRes) <- function( x, theme = choose_theme(getOption("rtemis_theme")), ... ) { # Describe the model describe(x) # Plot the performance metrics plot_metric(x, what = c("training", "test"), theme = theme, ...) } # /rtemis::present.SupervisedRes # %% plot_true_pred.RegressionRes ---- # Plot true vs. predicted aggregated across resamples for either training, test, or both. #' Plot True vs. Predicted for RegressionRes #' #' @param x `RegressionRes` object. #' @param what Character vector: "all", "training", "test". Which set(s) to plot. #' @param fit Character: Algorithm to use to draw fit line. #' @param theme `Theme` object. #' @param labelify Logical: If TRUE, labelify the axis labels. #' @param ... Additional arguments passed to [draw_fit]. #' #' @author EDG #' @keywords internal #' @noRd method(plot_true_pred, RegressionRes) <- function( x, what = "all", fit = "glm", theme = choose_theme(getOption("rtemis_theme")), labelify = TRUE, ... ) { if (length(what) == 1 && what == "all") { what <- c("training", "test") } true <- paste0("y_", what) true_l <- sapply(true, function(z) { unlist(prop(x, z), use.names = FALSE) }) predicted <- paste0("predicted_", what) predicted_l <- sapply(predicted, function(z) { unlist(prop(x, z), use.names = FALSE) }) if (labelify) { names(predicted_l) <- labelify(names(predicted_l)) } draw_fit( x = true_l, y = predicted_l, fit = fit, theme = theme, ... ) } # /rtemis::plot_true_pred.RegressionRes # %% plot_true_pred.ClassificationRes ---- # Cannot be combined with plot_true_pred.RegressionRes # because scatter can overplot train & test, but confusion matrices must be subplots. #' Plot True vs. Predicted for ClassificationRes #' #' @param x `ClassificationRes` object. #' @param what Character vector: "all", "training", "test". Which set(s) to plot. #' @param theme `Theme` object. #' @param ... Additional arguments passed to [draw_confusion]. #' #' @author EDG #' @keywords internal #' @noRd method(plot_true_pred, ClassificationRes) <- function( x, what = "all", theme = choose_theme(getOption("rtemis_theme")), ... ) { if (length(what) == 1 && what == "all") { what <- c("training", "test") } true <- paste0("y_", what) true_l <- sapply(true, function(z) { unlist(prop(x, z), use.names = FALSE) }) predicted <- paste0("predicted_", what) predicted_l <- sapply(predicted, function(z) { unlist(prop(x, z), use.names = FALSE) }) # if (labelify) { # names(predicted_l) <- labelify(names(predicted_l)) # } # => Do not pass filename to both training & testing, latter will overwrite; pass to subplot if # plotting both # Training if ("training" %in% what) { plt_training <- draw_confusion( table(true_l[["y_training"]], predicted_l[["predicted_training"]]), xlab = "Predicted Training", theme = theme, ... ) } if ("test" %in% what) { plt_test <- draw_confusion( table(true_l[["y_test"]], predicted_l[["predicted_test"]]), xlab = "Predicted Test", theme = theme, ... ) } if (length(what) == 1) { if (what == "training") { return(plt_training) } else { return(plt_test) } } else { return(plotly::subplot( plt_training, plt_test, nrows = 1L, shareX = FALSE, shareY = FALSE )) } } # /rtemis::plot_true_pred.ClassificationRes # %% plot_roc.ClassificationRes ---- #' Plot ROC for ClassificationRes #' #' @param x `ClassificationRes` object. #' @param what Character vector: "all", "training", "test". Which set(s) to plot. #' @param theme `Theme` object. #' @param col Character vector: Colors to use for the ROC curves. #' @param filename Character: Filename to save the plot to. #' @param ... Additional arguments passed to [draw_roc]. #' #' @return plotly object. #' #' @author EDG #' @keywords internal #' @noRd method(plot_roc, ClassificationRes) <- function( x, what = "all", theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), filename = NULL, ... ) { if (length(what) == 1 && what == "all") { what <- c("training", "test") } labelsl <- probl <- list() if ("training" %in% what) { labelsl[["Training"]] <- unlist(x@y_training, use.names = FALSE) probl[["Training"]] <- unlist(x@predicted_prob_training, use.names = FALSE) } if ("test" %in% what && !is.null(x@predicted_prob_test)) { labelsl[["Test"]] <- unlist(x@y_test, use.names = FALSE) probl[["Test"]] <- unlist(x@predicted_prob_test, use.names = FALSE) } draw_roc( true_labels = labelsl, predicted_prob = probl, theme = theme, palette = palette, legend_title = "Sample (AUC)", filename = filename, ... ) } # /rtemis::plot_roc.ClassificationRes # %% plot_metric.SupervisedRes ---- #' Plot Metric SupervisedRes #' #' Plot boxplot of performance metrics across resamples. #' #' @param x `SupervisedRes` object. #' @param what Character vector: "training", "test". What to print. Default is to print both. #' @param metric Character: Metric to plot. #' @param ylab Character: Label for the y-axis. #' @param boxpoints Character:"all", "outliers" - How to display points in the boxplot. #' @param theme `Theme` object. #' @param ... Additional arguments passed to the plotting function. #' #' @author EDG #' @keywords internal #' @noRd method(plot_metric, SupervisedRes) <- function( x, what = c("training", "test"), metric = NULL, ylab = labelify(metric), boxpoints = "all", theme = choose_theme(getOption("rtemis_theme")), ... ) { what <- match.arg(what, several.ok = TRUE) .class <- x@type == "Classification" # Metric if (is.null(metric)) { if (.class) { metric <- "Balanced_Accuracy" } else { metric <- "Rsq" } } xl <- list() if ("training" %in% what) { if (.class) { xl[["Training"]] <- sapply( x@metrics_training@res_metrics, function(fold) { fold[["Overall"]][[metric]] } ) } else { xl[["Training"]] <- sapply( x@metrics_training@res_metrics, function(fold) { fold[[metric]] } ) } } if ("test" %in% what) { if (.class) { xl[["Test"]] <- sapply(x@metrics_test@res_metrics, function(fold) { fold[["Overall"]][[metric]] }) } else { xl[["Test"]] <- sapply(x@metrics_test@res_metrics, function(fold) { fold[[metric]] }) } } # Boxplot ---- draw_box(xl, theme = theme, ylab = ylab, boxpoints = boxpoints, ...) } # /rtemis::plot_metric.SupervisedRes # %% plot_varimp.Supervised ---- method(plot_varimp, Supervised) <- function( x, measure = NULL, theme = choose_theme(getOption("rtemis_theme")), filename = NULL, ... ) { if (is.null(x@varimp)) { msg(highlight2("No variable importance available.")) return(invisible()) } if (is.null(measure)) { vi <- x@varimp@data[[2L]] } else { vi <- x@varimp@data[[measure]] } names(vi) <- x@varimp@data[["variable"]] draw_varimp(vi, theme = theme, filename = filename, ...) } # /rtemis::plot_varimp.Supervised # %% plot_varimp.SupervisedRes ---- method(plot_varimp, SupervisedRes) <- function( x, measure = NULL, ylab = NULL, summarize_fn = "mean", show_top = 20L, theme = choose_theme(getOption("rtemis_theme")), filename = NULL, ... ) { if (is.null(x@varimp)) { msg(highlight2("No variable importance available.")) return(invisible()) } check_inherits(summarize_fn, "character") # Extract named numeric vectors from each VariableImportance object. # Not every variable gets a score in every resample, so rbindlist with fill. varimp_list <- lapply(x@varimp, function(z) { vi <- if (is.null(measure)) z@data[[2L]] else z@data[[measure]] names(vi) <- z@data[["variable"]] as.data.table(as.list(vi)) }) varimp <- rbindlist(varimp_list, use.names = TRUE, fill = TRUE) # Missing scores (variable absent in a resample) treated as 0 setDF(varimp) varimp[is.na(varimp)] <- 0 # Summarize and sort varimp_summary <- apply(varimp, 2, summarize_fn) varimp_sorted <- varimp_summary[order(-varimp_summary)] if (length(varimp_sorted) > show_top) { varimp_sorted <- varimp_sorted[seq_len(show_top)] } # ylab if (is.null(ylab)) { measure_name <- if (is.null(measure)) { names(x@varimp[[1L]]@data)[2L] } else { measure } ylab <- paste0( labelify(paste(summarize_fn, measure_name)), "\n(across ", desc(x@outer_resampler), ")" ) } draw_varimp( varimp_sorted, theme = theme, ylab = ylab, filename = filename, ... ) } # /rtemis::plot_varimp.SupervisedRes # %% make_SupervisedRes ---- #' Make SupervisedRes #' #' @author EDG #' @keywords internal #' @noRd make_SupervisedRes <- function( algorithm, type, models, preprocessor, preprocessor_internal, hyperparameters, tuner_config, outer_resampler, execution_config, y_training, y_test, predicted_training, predicted_test, predicted_prob_training, predicted_prob_test, se_training = NULL, se_test = NULL, xnames = character(), varimp = NULL, question = character(), extra = NULL ) { if (type == "Classification") { ClassificationRes( algorithm = algorithm, models = models, preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampler = outer_resampler, execution_config = execution_config, y_training = y_training, y_test = y_test, predicted_training = predicted_training, predicted_test = predicted_test, predicted_prob_training = predicted_prob_training, predicted_prob_test = predicted_prob_test, xnames = xnames, varimp = varimp, question = question, extra = extra ) } else { RegressionRes( algorithm = algorithm, models = models, preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampler = outer_resampler, execution_config = execution_config, y_training = y_training, y_test = y_test, predicted_training = predicted_training, predicted_test = predicted_test, se_training = se_training, se_test = se_test, xnames = xnames, varimp = varimp, question = question, extra = extra ) } } # /rtemis::make_SupervisedRes early_stopping_algs <- c("LightGBM", "LightRF", "LightRuleFit") # LightRuleFit ---- #' @title LightRuleFit #' #' @description #' Class for LightRuleFit models. #' #' @author EDG #' @noRd LightRuleFit <- new_class( name = "LightRuleFit", properties = list( model_lightgbm = Supervised, model_glmnet = Supervised, rules = class_character, rules_coefs = class_data.frame, rules_index = class_integer, rules_selected = class_character, rules_selected_formatted = class_character, rules_selected_formatted_coefs = class_data.frame, y_levels = class_character | NULL, xnames = class_character, complexity_metrics = class_data.frame ) ) # /rtemis::LightRuleFit # Print LightRuleFit ---- method(print, LightRuleFit) <- function(x, ...) { objcat("rtemis LightRuleFit Model") cat( "Trained using ", highlight(x@model_lightgbm@algorithm), " and ", highlight(x@model_glmnet@algorithm), ".\n", sep = "" ) cat("Selected", highlight(length(x@rules_selected)), "rules.\n") invisible(x) } # /rtemis::print.LightRuleFit # get_metric Regression ---- method(get_metric, Regression) <- function(x, set, metric) { prop(prop(x, paste0("metrics_", set)), "metrics")[[metric]] } # /get_metric.Regression # get_metric Classification ---- method(get_metric, Classification) <- function(x, set, metric) { prop(prop(x, paste0("metrics_", set)), "metrics")[["Overall"]][[metric]] } # /get_metric.Classification # get_metric RegressionRes ---- method(get_metric, RegressionRes) <- function(x, set, metric) { sapply( prop(prop(x, paste0("metrics_", set)), "res_metrics"), function(r) { r[[metric]] } ) } # /rtemis::get_metric.RegressionRes # get_metric ClassificationRes ---- method(get_metric, ClassificationRes) <- function(x, set, metric) { sapply( prop(prop(x, paste0("metrics_", set)), "res_metrics"), function(r) { r[["Overall"]][[metric]] } ) } # /rtemis::get_metric.ClassificationRes # Describe list of Supervised/Res ---- #' Describe multiple Supervised or SupervisedRes objects #' #' @param x List of `Supervised` or `SupervisedRes` objects. #' @param metric Character: Metric to use for description. Default is NULL, which uses "Balanced_Accuracy" for Classification and "Rsq" for Regression. #' @param decimal_places Integer: Number of decimal places to round metrics to. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @author EDG #' #' @keywords internal #' @noRd method(desc, class_list) <- function( x, metric = NULL, decimal_places = 3L, output_type = NULL ) { output_type <- get_output_type(output_type) # Check all elements are Supervised or all are SupervisedRes objects if ( !all(sapply(x, S7_inherits, Supervised)) && !all(sapply(x, S7_inherits, SupervisedRes)) ) { cli::cli_abort( "All elements must be either Supervised or SupervisedRes objects" ) } type <- if (S7_inherits(x[[1]], SupervisedRes)) { "SupervisedRes" } else { "Supervised" } # Check that all models are of the same type if (!all(sapply(x, function(m) m@type == x[[1]]@type))) { cli::cli_abort( "All objects must be of the same supervised learning type (Classification or Regression)." ) } # Name list using algorithm names if (is.null(names(x))) { names(x) <- sapply(x, function(m) m@algorithm) } suptype <- x[[1]]@type # SupervisedRes if (type == "SupervisedRes") { # Check that the same resampling method was used - ideally same seed, but do not enforce that, but report it # Get resampling config from each res_params <- lapply(x, function(m) m@outer_resampler@config) # Check all resamplers of same type if (!all(sapply(res_params, function(p) p@type == res_params[[1]]@type))) { cli::cli_warn( "All SupervisedRes objects must use the same resampling method." ) } # ?replace with loop that checks all resampler params # Check all resamplers use same n if (!all(sapply(res_params, function(p) p@n == res_params[[1]]@n))) { cli::cli_warn( "All SupervisedRes objects must use the same number of resamples." ) } # Describe SupervisedRes objects # 1. Report names of algorithms used. out <- paste0( oxfordcomma(sapply(x, function(m) desc_abb_alg(m@algorithm))), " were used for ", suptype, ".\n" ) # 2. Get metric if (is.null(metric)) { metric <- if (suptype == "Classification") { "Balanced_Accuracy" } else { "Rsq" } } metricv <- sapply(x, function(m) m@metrics_test@mean_metrics[[metric]]) } # /SupervisedRes if (type == "Supervised") { # 1. Report names of algorithms used. out <- paste0( oxfordcomma(sapply(x, function(m) desc_abb_alg(m@algorithm))), " were used for ", suptype, ".\n" ) # 2. Get metric if (is.null(metric)) { metric <- if (suptype == "Classification") { "Balanced_Accuracy" } else { "Rsq" } } if (suptype == "Classification") { # Classification metricv <- sapply(x, function(m) { m@metrics_test@metrics[["Overall"]][[metric]] }) } else { # Regression metricv <- sapply(x, function(m) m@metrics_test@metrics[[metric]]) } } # /rtemis::Supervised # 3. Report mean metric across all models, sorting by performance metric_sorted <- sort(metricv, decreasing = TRUE) # => Get ties at specified decimal_places out <- paste0( out, "The top-performing model was ", bold(names(metric_sorted)[1], output_type = output_type), " with a test-set ", bold(labelify(metric), output_type = output_type), " of ", bold( ddSci(metric_sorted[1], decimal_places = decimal_places), output_type = output_type ), ", followed by ", oxfordcomma(names(metric_sorted[-1])), " with ", metric, " of ", oxfordcomma(ddSci(metric_sorted[-1], decimal_places = decimal_places)), " respectively." ) out } # /rtemis::desc.list # %% describe.list(Supervised/Res) ---- #' Print description of a list of Supervised or SupervisedRes objects #' #' @param x List of `Supervised` or `SupervisedRes` objects. #' @param ... See details. #' #' @details #' Extra arguments: #' - `metric`: Character: Metric to use for description. If NULL, defaults to "Balanced_Accuracy" for Classification and "Rsq" for Regression. #' - `decimal_places`: Integer: Number of decimal places to round metrics to. #' - `output_type`: Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character of description invisibly. Prints description to output. #' #' @author EDG #' #' @keywords internal #' @noRd method(describe, class_list) <- function( x, metric = NULL, decimal_places = 3L, output_type = NULL, ... ) { out <- desc( x, metric = metric, decimal_places = decimal_places, output_type = output_type ) cat(out, "\n") invisible(out) } # /rtemis::describe.list(Supervised/Res) ================================================ FILE: R/08_MassUni.R ================================================ # S7_MassUni.R # ::rtemis:: # 2025 EDG rtemis.org # %% MassGLM ---- #' @title MassGLM #' #' @description #' Superclass for mass-univariate models. #' #' @author EDG #' @noRd MassGLM <- new_class( name = "MassGLM", properties = list( summary = class_data.table, ynames = class_character, xnames = class_character, coefnames = class_character, family = class_character ) ) # /rtemis::MassGLM # %% `$`.MassGLM ---- # Make MassGLM@name `$`-accessible ---- method(`$`, MassGLM) <- function(x, name) { prop(x, name) } # %% `.DollarNames`.MassGLM ---- # `$`-autocomplete MassGLM ---- method(`.DollarNames`, MassGLM) <- function(x, pattern = "") { prop_names <- names(props(x)) grep(pattern, prop_names, value = TRUE) } # %% `[[`.MassGLM ---- # Make MassGLM@name `[[`-accessible ---- method(`[[`, MassGLM) <- function(x, name) { prop(x, name) } # %% repr.MassGLM ---- method(repr, MassGLM) <- function( x, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) paste0( repr_S7name("MassGLM", pad = pad), highlight(length(x@ynames)), " GLMs of family ", bold(x@family), " with ", highlight(length(x@xnames)), ngettext(length(x@xnames), " predictor", " predictors"), " each.", "\nAvailable coefficients: ", paste(highlight(x@coefnames), collapse = ", "), "\n" ) } # /rtemis::repr.MassGLM # %% print.MassGLM ---- #' Print MassGLM #' #' @param x MassGLM object. #' @param ... Not used. #' #' @return `x`, invisibly. #' #' @author EDG #' @noRd method(print, MassGLM) <- function(x, output_type = NULL, ...) { cat(repr(x, output_type = output_type)) invisible(x) } # /rtemis::print.MassGLM # %% plot.MassGLM ---- #' Plot MassGLM using volcano plot #' #' @param x MassGLM object trained using [massGLM]. #' @param coefname Character: Name of coefficient to plot. If `NULL`, the first coefficient is used. #' @param p_adjust_method Character: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" - #' p-value adjustment method. #' @param p_transform Function to transform p-values for plotting. Default is `function(x) -log10(x)`. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param theme `Theme` object. Create using one of the `theme_` functions, e.g. #' `theme_whitegrid()`. #' @param verbosity Integer: Verbosity level. #' @param ... Additional arguments passed to [draw_volcano]. #' #' @return plotly object with volcano plot. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' set.seed(2019) #' y <- rnormmat(500, 500, return_df = TRUE) #' x <- data.frame(x = y[, 3] + y[, 5] - y[, 9] + y[, 15] + rnorm(500)) #' mod <- massGLM(x, y) #' plot(mod) plot.MassGLM <- method(plot, MassGLM) <- function( x, coefname = NULL, p_adjust_method = "holm", p_transform = function(x) -log10(x), xlab = "Coefficient", ylab = NULL, theme = choose_theme(getOption("rtemis_theme")), verbosity = 1L, ... ) { if (is.null(coefname)) { coefname <- x@coefnames[1] } if (!coefname %in% x@coefnames) { cli::cli_abort(c( "i" = "{.var coefname} must be one of available coefnames: {.strong {x@coefnames}}", "x" = "You asked for: {.strong {coefname}}" )) } if (verbosity > 0L) { msg( "Plotting coefficients for", highlight(coefname), "x", length(x@ynames), "outcomes." ) } # y-axis label ---- if (is.null(ylab)) { ylab <- fn2label(p_transform, "p-value") ylab <- paste(ylab, "for", coefname) if (p_adjust_method != "none") { ylab <- paste0(ylab, " (", labelify(p_adjust_method), "-corrected)") } } # Plot ---- coefs <- x@summary[[paste0("Coefficient_", coefname)]] pvals <- x@summary[[paste0("p_value_", coefname)]] draw_volcano( x = coefs, pvals = pvals, xnames = x@ynames, p_adjust_method = p_adjust_method, p_transform = p_transform, theme = theme, xlab = xlab, ylab = ylab, ... ) } # /rtemis::plot.MassGLM # %% plot_manhattan.MassGLM ---- #' @name #' plot_manhattan #' #' @param x MassGLM object. #' @param coefname Character: Name of coefficient to plot. If `NULL`, the first coefficient is used. #' @param p_adjust_method Character: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" - #' p-value adjustment method. #' @param p_transform Function to transform p-values for plotting. Default is `function(x) -log10(x)`. #' @param ylab Character: y-axis label. #' @param theme `Theme` object. #' @param col_pos Character: Color for positive significant coefficients. #' @param col_neg Character: Color for negative significant coefficients. #' @param alpha Numeric: Transparency level for the bars. #' @param ... Additional arguments passed to [draw_bar]. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # x: outcome of interest as first column, optional covariates in the other columns #' # y: features whose association with x we want to study #' set.seed(2022) #' y <- data.table(rnormmat(500, 40)) #' x <- data.table( #' x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500), #' x2 = y[[21]] + rnorm(500) #' ) #' massmod <- massGLM(x, y) #' plot_manhattan(massmod) plot_manhattan.MassGLM <- method(plot_manhattan, MassGLM) <- function( x, coefname = NULL, p_adjust_method = c( "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" ), p_transform = function(x) -log10(x), ylab = NULL, theme = choose_theme(getOption("rtemis_theme")), col_pos = "#43A4AC", col_neg = "#FA9860", alpha = 0.8, ... ) { p_adjust_method <- match.arg(p_adjust_method) if (is.null(coefname)) { coefname <- x@coefnames[1] } if (!coefname %in% x@coefnames) { stop( "coefname must be one of the coefnames available in the MassGLM object." ) } # y-axis label ---- if (is.null(ylab)) { ylab <- fn2label(p_transform, "p-value") ylab <- paste(ylab, "for", coefname) if (p_adjust_method != "none") { ylab <- paste0(ylab, " (", labelify(p_adjust_method), "-corrected)") } } # Plot ---- coefs <- x@summary[[paste0("Coefficient_", coefname)]] pvals <- x@summary[[paste0("p_value_", coefname)]] pvals <- p.adjust(pvals, method = p_adjust_method) signif_pos_idi <- pvals < 0.05 & coefs > 0 signif_neg_idi <- pvals < 0.05 & coefs < 0 col <- rep( adjustcolor(theme[["fg"]], alpha.f = alpha), length(pvals) ) col[signif_pos_idi] <- adjustcolor(col_pos, alpha.f = alpha) col[signif_neg_idi] <- adjustcolor(col_neg, alpha.f = alpha) draw_bar( x = p_transform(pvals), theme = theme, palette = col, group_names = x@ynames, ylab = ylab, ... ) } # /rtemis::plot_manhattan.MassGLM # %% summary.MassGLM ---- method(summary, MassGLM) <- function(object, ...) { object@summary } # /rtemis::summary.MassGLM ================================================ FILE: R/09_ClusteringConfig.R ================================================ # S7_ClusteringConfig.R # ::rtemis:: # 2025- EDG rtemis.org # %% ClusteringConfig ---- #' @title ClusteringConfig #' #' @description #' Clustering config class. #' #' @field algorithm Character: Algorithm name. #' @field config List: Algorithm-specific config. #' #' @author EDG #' @keywords internal #' @noRd ClusteringConfig <- new_class( name = "ClusteringConfig", properties = list( algorithm = class_character, config = class_list ) ) # /rtemis::ClusteringConfig # %% `$`.ClusteringConfig ---- # Make ClusteringConfig@config `$`-accessible method(`$`, ClusteringConfig) <- function(x, name) { x@config[[name]] } # %% `.DollarNames`.ClusteringConfig ---- # `$`-autocomplete ClusteringConfig@config ---- method(`.DollarNames`, ClusteringConfig) <- function(x, pattern = "") { all_names <- names(x@config) grep(pattern, all_names, value = TRUE) } # %% `[[`.ClusteringConfig ---- # Make ClusteringConfig@config `[[`-accessible method(`[[`, ClusteringConfig) <- function(x, index) { x@config[[index]] } # %% repr.ClusteringConfig ---- method(repr, ClusteringConfig) <- function( x, pad = 0L, output_type = NULL, ... ) { output_type <- get_output_type(output_type) out <- repr_S7name( paste(x@algorithm, "ClusteringConfig"), pad = pad, output_type = output_type ) paste0( out, repr_ls(props(x)[["config"]], pad = pad, output_type = output_type) ) } # /rtemis::repr.ClusteringConfig # %% print.ClusteringConfig ---- #' Print Method for ClusteringConfig #' #' @param x ClusteringConfig object. #' @param pad Integer: Left side padding. #' #' @return ClusteringConfig object, invisibly. #' #' @author EDG #' @keywords internal #' @noRd method(print, ClusteringConfig) <- function( x, pad = 0L, output_type = NULL, ... ) { cat(repr(x, pad = pad, output_type = output_type)) invisible(x) } # /rtemis::print.ClusteringConfig # %% KMeansConfig ---- #' @title KMeansConfig #' #' @description #' ClusteringConfig subclass for K-means Clustering. #' #' @author EDG #' @keywords internal #' @noRd KMeansConfig <- new_class( name = "KMeansConfig", parent = ClusteringConfig, constructor = function(k, dist) { k <- clean_posint(k) check_inherits(dist, "character") new_object( ClusteringConfig( algorithm = "KMeans", config = list( k = k, dist = dist ) ) ) } ) # /rtemis::KMeansConfig # %% setup_KMeans ---- #' Setup KMeansConfig #' #' @param k Number of clusters. #' @param dist Character: Distance measure to use: 'euclidean' or 'manhattan'. #' #' @return KMeansConfig object. #' #' @author EDG #' @export #' #' @examples #' kmeans_config <- setup_KMeans(k = 4L, dist = "euclidean") #' kmeans_config setup_KMeans <- function(k = 3L, dist = c("euclidean", "manhattan")) { k <- clean_posint(k) dist <- match.arg(dist) KMeansConfig(k, dist) } # /rtemis::setup_KMeans # %% HardCLConfig ---- #' @title HardCLConfig #' #' @description #' ClusteringConfig subclass for HardCL Clustering. #' #' @author EDG #' @keywords internal #' @noRd HardCLConfig <- new_class( name = "HardCLConfig", parent = ClusteringConfig, constructor = function(k, dist) { k <- clean_posint(k) check_inherits(dist, "character") new_object( ClusteringConfig( algorithm = "HardCL", config = list( k = k, dist = dist ) ) ) } ) # /rtemis::HardCLConfig # %% setup_HardCL ---- #' Setup HardCLConfig #' #' @param k Number of clusters. #' @param dist Character: Distance measure to use: 'euclidean' or 'manhattan'. #' #' @return HardCLConfig object. #' #' @author EDG #' @export #' #' @examples #' hardcl_config <- setup_HardCL(k = 4L, dist = "euclidean") #' hardcl_config setup_HardCL <- function(k = 3L, dist = c("euclidean", "manhattan")) { k <- clean_posint(k) dist <- match.arg(dist) HardCLConfig(k, dist) } # /rtemis::setup_HardCL # %% NeuralGasConfig ---- #' @title NeuralGasConfig #' #' @description #' ClusteringConfig subclass for Neural Gas Clustering. #' #' @author EDG #' @keywords internal #' @noRd NeuralGasConfig <- new_class( name = "NeuralGasConfig", parent = ClusteringConfig, constructor = function(k, dist) { k <- clean_posint(k) check_inherits(dist, "character") new_object( ClusteringConfig( algorithm = "NeuralGas", config = list( k = k, dist = dist ) ) ) } ) # /rtemis::NeuralGasConfig # %% setup_NeuralGas ---- #' Setup NeuralGasConfig #' #' @param k Number of clusters. #' @param dist Character: Distance measure to use: 'euclidean' or 'manhattan'. #' #' @return NeuralGasConfig object. #' #' @author EDG #' @export #' #' @examples #' neuralgas_config <- setup_NeuralGas(k = 4L, dist = "euclidean") #' neuralgas_config setup_NeuralGas <- function(k = 3L, dist = c("euclidean", "manhattan")) { k <- clean_posint(k) dist <- match.arg(dist) NeuralGasConfig(k, dist) } # /rtemis::setup_NeuralGas # %% CMeansConfig ---- #' @title CMeansConfig #' #' @description #' ClusteringConfig subclass for CMeans Clustering. #' #' @author EDG #' @keywords internal #' @noRd CMeansConfig <- new_class( name = "CMeansConfig", parent = ClusteringConfig, constructor = function( k, max_iter, dist, method, m, rate_par, weights, control ) { k <- clean_posint(k) max_iter <- clean_posint(max_iter) check_character(dist) check_character(method) check_floatpos(m) check_float01inc(rate_par) check_inherits(weights, "numeric") check_inherits(control, "list") new_object( ClusteringConfig( algorithm = "CMeans", config = list( k = k, max_iter = max_iter, dist = dist, method = method, m = m, rate_par = rate_par, weights = weights, control = control ) ) ) } ) # /rtemis::CMeansConfig # %% setup_CMeans ---- #' Setup CMeansConfig #' #' @param k Integer: Number of clusters. #' @param max_iter Integer: Maximum number of iterations. #' @param dist Character: Distance measure to use: 'euclidean' or 'manhattan'. #' @param method Character: "cmeans" - fuzzy c-means clustering; "ufcl": on-line update. #' @param m Float (>1): Degree of fuzzification. #' @param rate_par Float (0, 1): Learning rate for the online variant. #' @param weights Float (>0): Case weights. #' @param control List: Control config for clustering algorithm. #' #' @return CMeansConfig object. #' #' @author EDG #' @export #' #' @examples #' cmeans_config <- setup_CMeans(k = 4L, dist = "euclidean") #' cmeans_config setup_CMeans <- function( k = 2L, max_iter = 100L, dist = c("euclidean", "manhattan"), method = c("cmeans", "ufcl"), m = 2.0, rate_par = NULL, weights = 1.0, control = list() ) { k <- clean_posint(k) max_iter <- clean_posint(max_iter) dist <- match.arg(dist) method <- match.arg(method) check_floatpos(m) stopifnot(m > 1) check_float01inc(rate_par) check_inherits(weights, "numeric") CMeansConfig( k = k, max_iter = max_iter, dist = dist, method = method, m = m, rate_par = rate_par, weights = weights, control = control ) } # /rtemis::setup_CMeans # %% DBSCANConfig ---- #' @title DBSCANConfig #' #' @description #' ClusteringConfig subclass for DBSCAN Clustering. #' #' @author EDG #' @keywords internal #' @noRd DBSCANConfig <- new_class( name = "DBSCANConfig", parent = ClusteringConfig, constructor = function( eps, min_points, weights, border_points, search, bucket_size, split_rule, approx ) { check_floatpos(eps) min_points <- clean_posint(min_points) check_inherits(weights, "numeric") check_inherits(border_points, "logical") check_inherits(search, "character") check_inherits(bucket_size, "integer") check_inherits(split_rule, "character") check_inherits(approx, "logical") new_object( ClusteringConfig( algorithm = "DBSCAN", config = list( eps = eps, min_points = min_points, weights = weights, border_points = border_points, search = search, bucket_size = bucket_size, split_rule = split_rule, approx = approx ) ) ) } ) # /rtemis::DBSCANConfig # %% setup_DBSCAN ---- #' Setup DBSCANConfig #' #' @param eps Float: Radius of neighborhood. #' @param min_points Integer: Minimum number of points in a neighborhood to form a cluster. #' @param weights Numeric vector: Weights for data points. #' @param border_points Logical: If TRUE, assign border points to clusters. #' @param search Character: Nearest neighbor search strategy: "kdtree", "linear", or "dist". #' @param bucket_size Integer: Size of buckets for k-dtree search. #' @param split_rule Character: Rule for splitting clusters: "SUGGEST", "STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR". #' @param approx Logical: If TRUE, use approximate nearest neighbor search. #' @return DBSCANConfig object. #' #' @author EDG #' @export #' #' @examples #' dbscan_config <- setup_DBSCAN(eps = 0.5, min_points = 5L) #' dbscan_config setup_DBSCAN <- function( eps = 0.5, min_points = 5L, weights = NULL, border_points = TRUE, search = c("kdtree", "linear", "dist"), bucket_size = 100L, split_rule = c("SUGGEST", "STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR"), approx = FALSE ) { check_floatpos(eps) min_points <- clean_posint(min_points) check_inherits(weights, "numeric") check_inherits(border_points, "logical") search <- match.arg(search) check_inherits(bucket_size, "integer") split_rule <- match.arg(split_rule) check_inherits(approx, "logical") DBSCANConfig( eps = eps, min_points = min_points, weights = weights, border_points = border_points, search = search, bucket_size = bucket_size, split_rule = split_rule, approx = approx ) } # /rtemis::setup_DBSCAN ================================================ FILE: R/10_Clustering.R ================================================ # S7_Clustering.R # ::rtemis:: # 2025- EDG rtemis.org # %% Clustering ---- #' @title Clustering #' #' @description #' Clustering class. #' #' @field algorithm Character: Algorithm name. #' @field clust Any: Clustering object. #' @field k Integer: Number of clusters. #' @field clusters List: Cluster assignment. #' @field config ClusteringConfig: Algorithm-specific config. #' #' @author EDG #' @noRd Clustering <- new_class( name = "Clustering", properties = list( algorithm = class_character, clust = class_any, k = class_integer, clusters = class_integer | class_list, config = ClusteringConfig ) ) # /Clustering # %% `$`.Clustering ---- # Make Clustering props `$`-accessible method(`$`, Clustering) <- function(x, name) { prop(x, name) } # %% `.DollarNames`.Clustering ---- # `$`-autocomplete Clustering props method(`.DollarNames`, Clustering) <- function(x, pattern = "") { prop_names <- names(props(x)) grep(pattern, prop_names, value = TRUE) } # %% `[[`.Clustering ---- # Make Clustering props `[[`-accessible method(`[[`, Clustering) <- function(x, index) { prop(x, index) } # %% repr.Clustering ---- method(repr, Clustering) <- function( x, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) paste0( repr_S7name(paste(x@algorithm, "Clustering")), repr_ls(props(x)[-1], pad = pad, output_type = output_type) ) } # /rtemis::repr.Clustering # %% print.Clustering ---- method(print, Clustering) <- function( x, pad = 0L, output_type = NULL, ... ) { cat(repr(x, pad = pad, output_type = output_type)) invisible(x) } # /rtemis::print.Clustering ================================================ FILE: R/11_DecompositionConfig.R ================================================ # S7_DecompositionConfig.R # ::rtemis:: # 2025 EDG rtemis.org # %% DecompositionConfig ---- #' @title DecompositionConfig #' #' @description #' Decomposition config class. #' #' @field algorithm Character: Algorithm name. #' @field config List: Algorithm-specific config. #' #' @author EDG #' @noRd DecompositionConfig <- new_class( name = "DecompositionConfig", properties = list( algorithm = class_character, config = class_list ) ) # /DecompositionConfig # %% `$`.DecompositionConfig ---- # Make DecompositionConfig@config `$`-accessible ---- method(`$`, DecompositionConfig) <- function(x, name) { x@config[[name]] } # %% `.DollarNames`.DecompositionConfig ---- # `$`-autocomplete DecompositionConfig@config ---- method(`.DollarNames`, DecompositionConfig) <- function(x, pattern = "") { all_names <- names(x@config) grep(pattern, all_names, value = TRUE) } # %% `[`.DecompositionConfig ---- # Make props `[`-accessible ---- method(`[`, DecompositionConfig) <- function(x, name) { props(x)[[name]] } # %% `[[`.DecompositionConfig ---- # Make DecompositionConfig@config `[[`-accessible ---- method(`[[`, DecompositionConfig) <- function(x, name) { x@config[[name]] } # %% repr.DecompositionConfig ---- #' Show Method for DecompositionConfig #' #' @param object DecompositionConfig object. #' @param pad Integer: Left side padding. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return character #' #' @author EDG #' @noRd method(repr, DecompositionConfig) <- function( x, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) paste0( repr_S7name( paste(x["algorithm"], "DecompositionConfig"), pad = pad, output_type = output_type ), repr_ls(x["config"], pad = pad, limit = -1L, output_type = output_type) ) } # /rtemis::repr.DecompositionConfig # %% print.DecompositionConfig ---- #' Print Method for DecompositionConfig #' #' @param x DecompositionConfig object. #' @param pad Integer: Left side padding. #' @param ... Not used. #' #' @return DecompositionConfig object, invisibly. #' #' @author EDG #' @noRd method(print, DecompositionConfig) <- function( x, pad = 0L, output_type = NULL, ... ) { cat(repr(x, pad = pad, output_type = output_type)) invisible(x) } # %% PCAConfig ---- #' @title PCAConfig #' #' @description #' DecompositionConfig subclass for Principal Component Analysis. #' Internal use only. #' #' @author EDG #' @noRd PCAConfig <- new_class( name = "PCAConfig", parent = DecompositionConfig, constructor = function(k, center, scale, tol) { k <- clean_posint(k) check_logical(center) check_logical(scale) check_float0pos(tol) new_object( DecompositionConfig( algorithm = "PCA", config = list( k = k, center = center, scale = scale, tol = tol ) ) ) } ) # /rtemis::PCAConfig # %% setup_PCA ---- #' Setup PCA config. #' #' @param k Integer: Number of components. (passed to `prcomp` `rank.`) #' @param center Logical: If TRUE, center the data. #' @param scale Logical: If TRUE, scale the data. #' @param tol Numeric: Tolerance. #' #' @return PCAConfig object. #' #' @author EDG #' @export #' #' @examples #' pca_config <- setup_PCA(k = 3L) #' pca_config setup_PCA <- function(k = 3L, center = TRUE, scale = TRUE, tol = NULL) { k <- clean_posint(k) check_logical(center) check_logical(scale) check_float0pos(tol) PCAConfig(k, center, scale, tol) } # /rtemis::setup_PCA # %% ICAConfig ---- #' @title ICAConfig #' #' @description #' DecompositionConfig subclass for Independent Component Analysis. #' Internal use only. #' #' @author EDG #' @noRd ICAConfig <- new_class( name = "ICAConfig", parent = DecompositionConfig, constructor = function(k, type, fun, alpha, row_norm, maxit, tol) { new_object( DecompositionConfig( algorithm = "ICA", config = list( k = k, type = type, fun = fun, alpha = alpha, row_norm = row_norm, maxit = maxit, tol = tol ) ) ) } ) # /rtemis::ICAConfig # %% setup_ICA ---- #' @title setup_ICA #' #' @description #' Setup ICA config. #' #' @param k Integer: Number of components. #' @param type Character: Type of ICA: "parallel" or "deflation". #' @param fun Character: ICA function: "logcosh", "exp". #' @param alpha Numeric \[1, 2\]: Used in approximation to neg-entropy with `fun = "logcosh"`. #' @param row_norm Logical: If TRUE, normalize rows of `x` before ICA. #' @param maxit Integer: Maximum number of iterations. #' @param tol Numeric: Tolerance. #' #' @return ICAConfig object. #' #' @author EDG #' @export #' #' @examples #' ica_config <- setup_ICA(k = 3L) #' ica_config setup_ICA <- function( k = 3L, type = c("parallel", "deflation"), fun = c("logcosh", "exp"), alpha = 1.0, row_norm = TRUE, maxit = 100L, tol = 1e-04 ) { k <- clean_posint(k) type <- match.arg(type) fun <- match.arg(fun) stopifnot(alpha >= 1, alpha <= 2) check_inherits(row_norm, "logical") maxit <- clean_posint(maxit) check_inherits(tol, "numeric") ICAConfig( k = k, type = type, fun = fun, alpha = alpha, row_norm = row_norm, maxit = maxit, tol = tol ) } # /rtemis::setup_ICA # %% NMFConfig ---- #' @title NMFConfig #' #' @description #' DecompositionConfig subclass for Non-negative Matrix Factorization. #' Internal use only. #' #' @author EDG #' @noRd NMFConfig <- new_class( name = "NMFConfig", parent = DecompositionConfig, constructor = function(k, method, nrun) { k <- clean_posint(k) check_inherits(method, "character") nrun <- clean_posint(nrun) new_object( DecompositionConfig( algorithm = "NMF", config = list( k = k, method = method, nrun = nrun ) ) ) } ) # /rtemis::NMFConfig # %% setup_NMF ---- #' Setup NMF config. #' #' @param k Integer: Number of components. #' @param method Character: NMF method. See `NMF::nmf`. #' @param nrun Integer: Number of runs to perform. #' #' @return NMFConfig object. #' #' @author EDG #' @export #' #' @examples #' nmf_config <- setup_NMF(k = 3L) #' nmf_config setup_NMF <- function( k = 2L, method = "brunet", nrun = if (length(k) > 1L) 30L else 1L ) { k <- clean_posint(k) check_inherits(method, "character") nrun <- clean_posint(nrun) NMFConfig(k, method, nrun) } # /rtemis::setup_NMF # %% UMAPConfig ---- #' @title UMAPConfig #' #' @description #' DecompositionConfig subclass for Uniform Manifold Approximation and Projection. #' Internal use only. #' #' @author EDG #' @noRd UMAPConfig <- new_class( name = "UMAPConfig", parent = DecompositionConfig, constructor = function( k, n_neighbors, init, metric, n_epochs, learning_rate, scale ) { k <- clean_posint(k) n_neighbors <- clean_posint(n_neighbors) check_inherits(init, "character") check_inherits(metric, "character") n_epochs <- clean_posint(n_epochs) check_float0pos(learning_rate) check_inherits(scale, "logical") new_object( DecompositionConfig( algorithm = "UMAP", config = list( k = k, n_neighbors = n_neighbors, init = init, metric = metric, n_epochs = n_epochs, learning_rate = learning_rate, scale = scale ) ) ) } ) # /rtemis::UMAPConfig # %% setup_UMAP ---- #' Setup UMAP config. #' #' @details #' A high `n_neighbors` value may give error in some systems: #' "Error in irlba::irlba(L, nv = n, nu = 0, maxit = iters) : #' function 'as_cholmod_sparse' not provided by package 'Matrix'" #' #' @param k Integer: Number of components. #' @param n_neighbors Integer: Number of keighbors. #' @param init Character: Initialization type. See `uwot::umap "init"`. #' @param metric Character: Distance metric to use: "euclidean", "cosine", #' "manhattan", "hamming", "categorical". #' @param n_epochs Integer: Number of epochs. #' @param learning_rate Float: Learning rate. #' @param scale Logical: If TRUE, scale input data before doing UMAP. #' #' @return UMAPConfig object. #' #' @author EDG #' @export #' #' @examples #' umap_config <- setup_UMAP(k = 3L) #' umap_config setup_UMAP <- function( k = 2L, n_neighbors = 15L, init = "spectral", metric = c("euclidean", "cosine", "manhattan", "hamming", "categorical"), n_epochs = NULL, learning_rate = 1.0, scale = TRUE ) { k <- clean_posint(k) n_neighbors <- clean_posint(n_neighbors) init <- match.arg(init) metric <- match.arg(metric) check_inherits(n_epochs, "integer") check_float0pos(learning_rate) check_inherits(scale, "logical") UMAPConfig( k = k, n_neighbors = n_neighbors, init = init, metric = metric, n_epochs = n_epochs, learning_rate = learning_rate, scale = scale ) } # /rtemis::setup_UMAP # %% tSNEConfig ---- #' @title tSNEConfig #' #' @description #' DecompositionConfig subclass for t-Distributed Stochastic Neighbor Embedding. #' #' @author EDG #' @noRd tSNEConfig <- new_class( name = "tSNEConfig", parent = DecompositionConfig, constructor = function( k = NULL, initial_dims = NULL, perplexity = NULL, theta = NULL, check_duplicates = NULL, pca = NULL, partial_pca = NULL, max_iter = NULL, verbose = NULL, is_distance = NULL, Y_init = NULL, pca_center = NULL, pca_scale = NULL, normalize = NULL, stop_lying_iter = NULL, mom_switch_iter = NULL, momentum = NULL, final_momentum = NULL, eta = NULL, exaggeration_factor = NULL, num_threads = NULL ) { k <- clean_posint(k) initial_dims <- clean_posint(initial_dims) check_logical(check_duplicates) check_logical(pca) check_logical(partial_pca) max_iter <- clean_posint(max_iter) check_logical(verbose) check_logical(is_distance) check_inherits(Y_init, "matrix") check_logical(pca_center) check_logical(pca_scale) check_logical(normalize) stop_lying_iter <- clean_posint(stop_lying_iter) mom_switch_iter <- clean_posint(mom_switch_iter) num_threads <- clean_posint(num_threads) new_object( DecompositionConfig( algorithm = "tSNE", config = list( k = k, initial_dims = initial_dims, perplexity = perplexity, theta = theta, check_duplicates = check_duplicates, pca = pca, partial_pca = partial_pca, max_iter = max_iter, verbose = verbose, is_distance = is_distance, Y_init = Y_init, pca_center = pca_center, pca_scale = pca_scale, normalize = normalize, stop_lying_iter = stop_lying_iter, mom_switch_iter = mom_switch_iter, momentum = momentum, final_momentum = final_momentum, eta = eta, exaggeration_factor = exaggeration_factor, num_threads = num_threads ) ) ) } ) # /rtemis::tSNEConfig # %% setup_tSNE ---- #' Setup tSNE config. #' #' @details #' Get more information on the config by running `?Rtsne::Rtsne`. #' #' @param k Integer: Number of components. #' @param initial_dims Integer: Initial dimensions. #' @param perplexity Integer: Perplexity. #' @param theta Float: Theta. #' @param check_duplicates Logical: If TRUE, check for duplicates. #' @param pca Logical: If TRUE, perform PCA. #' @param partial_pca Logical: If TRUE, perform partial PCA. #' @param max_iter Integer: Maximum number of iterations. #' @param verbose Logical: If TRUE, print messages. #' @param is_distance Logical: If TRUE, `x` is a distance matrix. #' @param Y_init Matrix: Initial Y matrix. #' @param pca_center Logical: If TRUE, center PCA. #' @param pca_scale Logical: If TRUE, scale PCA. #' @param normalize Logical: If TRUE, normalize. #' @param stop_lying_iter Integer: Stop lying iterations. #' @param mom_switch_iter Integer: Momentum switch iterations. #' @param momentum Float: Momentum. #' @param final_momentum Float: Final momentum. #' @param eta Float: Eta. #' @param exaggeration_factor Float: Exaggeration factor. #' @param num_threads Integer: Number of threads. #' #' @return tSNEConfig object. #' #' @author EDG #' @export #' #' @examples #' tSNE_config <- setup_tSNE(k = 3L) #' tSNE_config setup_tSNE <- function( k = 2L, initial_dims = 50L, perplexity = 30, theta = 0.5, check_duplicates = TRUE, pca = TRUE, partial_pca = FALSE, max_iter = 1000L, verbose = getOption("verbose", FALSE), is_distance = FALSE, Y_init = NULL, pca_center = TRUE, pca_scale = FALSE, normalize = TRUE, stop_lying_iter = ifelse(is.null(Y_init), 250L, 0L), mom_switch_iter = ifelse(is.null(Y_init), 250L, 0L), momentum = 0.5, final_momentum = 0.8, eta = 200, exaggeration_factor = 12, num_threads = 1L ) { tSNEConfig( k = k, initial_dims = initial_dims, perplexity = perplexity, theta = theta, check_duplicates = check_duplicates, pca = pca, partial_pca = partial_pca, max_iter = max_iter, verbose = verbose, is_distance = is_distance, Y_init = Y_init, pca_center = pca_center, pca_scale = pca_scale, normalize = normalize, stop_lying_iter = stop_lying_iter, mom_switch_iter = mom_switch_iter, momentum = momentum, final_momentum = final_momentum, eta = eta, exaggeration_factor = exaggeration_factor, num_threads = num_threads ) } # /rtemis::setup_tSNE # %% IsomapConfig ---- #' @title IsomapConfig #' #' @description #' DecompositionConfig subclass for Isomap. #' #' @author EDG #' @noRd IsomapConfig <- new_class( name = "IsomapConfig", parent = DecompositionConfig, constructor = function( k, dist_method = NULL, nsd = NULL, path = NULL ) { k <- clean_posint(k) check_inherits(dist_method, "character") nsd <- clean_int(nsd) check_inherits(path, "character") new_object( DecompositionConfig( algorithm = "Isomap", config = list( k = k, dist_method = dist_method, nsd = nsd, path = path ) ) ) } ) # /rtemis::IsomapConfig # %% setup_Isomap ---- #' Setup Isomap config. #' #' @param k Integer: Number of components. #' @param dist_method Character: Distance method. #' @param nsd Integer: Number of shortest dissimilarities retained. #' @param path Character: Path argument for `vegan::isomap`. #' #' @return IsomapConfig object. #' #' @author EDG #' @export #' #' @examples #' isomap_config <- setup_Isomap(k = 3L) #' isomap_config setup_Isomap <- function( k = 2L, dist_method = c("euclidean", "manhattan"), nsd = 0L, path = c("shortest", "extended") ) { k <- clean_posint(k) dist_method <- match.arg(dist_method) nsd <- clean_int(nsd) path <- match.arg(path) IsomapConfig(k, dist_method, nsd, path) } # /rtemis::setup_Isomap ================================================ FILE: R/12_Decomposition.R ================================================ # S7_Decomposition.R # ::rtemis:: # 2025 EDG rtemis.org # %% Decomposition ---- #' @title Decomposition #' #' @description #' Decomposition class. #' #' @field algorithm Character: Algorithm name. #' @field decom Any: Decomposition object. #' @field config List: Algorithm-specific config. #' @field decom: Decomposition model. #' @field transformed: transformedransformed data, i.e. either a projection or an embedding of the input data. #' #' @author EDG #' @noRd Decomposition <- new_class( name = "Decomposition", properties = list( algorithm = class_character, config = DecompositionConfig, decom = class_any, transformed = class_any ) ) # /rtemis::Decomposition # %% `$`.Decomposition ---- # Make Decomposition properties `$`-accessible method(`$`, Decomposition) <- function(x, name) { prop_names <- names(props(x)) if (name %in% prop_names) { prop(x, name) } else { cli::cli_abort(paste0( "No property named '", name, "' in Decomposition object." )) } } # %% `.DollarNames`.Decomposition ---- method(`.DollarNames`, Decomposition) <- function(x, pattern = "") { prop_names <- names(props(x)) grep(pattern, prop_names, value = TRUE) } # %% `[[`.Decomposition ---- # Make Decomposition@transformed `[[`-accessible method(`[[`, Decomposition) <- function(x, index) { props(x, "transformed")[[index]] } # %% repr.Decomposition ---- method(repr, Decomposition) <- function( x, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) paste0( repr_S7name( paste(x@algorithm, "Decomposition"), pad = pad, output_type = output_type ), repr_ls(props(x)[-1], pad = pad, output_type = output_type) ) } # /rtemis::repr.Decomposition # %% print.Decomposition ---- method(print, Decomposition) <- function( x, pad = 0L, output_type = NULL, ... ) { cat(repr(x, pad = pad, output_type = output_type)) invisible(x) } # /rtemis::print.Decomposition ================================================ FILE: R/13_Themes.R ================================================ # S7_Themes.R # ::rtemis:: # 2025 EDG rtemis.org # %% Theme ---- #' Theme #' #' @field name Character: Name of theme. #' @field config Named list of theme config. #' #' @author EDG #' @noRd Theme <- new_class( name = "Theme", properties = list( name = class_character, config = class_list ) ) # /Theme # %% print.Theme ---- #' Print Theme #' #' Print Theme object #' #' @param x `Theme` object. #' @param ... Not used. #' #' @author EDG #' @noRd method(print, Theme) <- function(x, ...) { objcat(paste(x@name, "Theme")) printls(props(x)[["config"]]) invisible(x) } # %% `$`.Theme ---- # Make Theme@config `$`-accessible with autocomplete ---- method(`$`, Theme) <- function(x, name) { x@config[[name]] } # /rtemis::Theme$ # %% `.DollarNames`.Theme ---- method(`.DollarNames`, Theme) <- function(x, pattern = "") { all_names <- names(x@config) grep(pattern, all_names, value = TRUE) } # /rtemis::Theme.DollarNames # %% `[[`.Theme ---- # Make Theme@config `[[`-accessible ---- method(`[[`, Theme) <- function(x, name) { x@config[[name]] } # /rtemis::Theme[[]] # %% names.Theme ---- #' Get names of Theme object #' #' @param x `Theme` object. #' #' @return Character vector of names of `Theme` object. #' #' @author EDG #' @noRd method(names, Theme) <- function(x) { names(x@config) } # /rtemis::names.Theme ================================================ FILE: R/14_SuperConfig.R ================================================ # S7_SuperConfig.R # ::rtemis:: # 2025- EDG rtemis.org # References ---- # https://github.com/RConsortium/S7 # https://rconsortium.github.io/S7/ # %% SuperConfig ---- #' SuperConfig Class #' #' @description #' Supervised Learning Configuration Class. #' #' @author EDG #' @noRd SuperConfig <- new_class( name = "SuperConfig", properties = list( dat_training_path = class_character, dat_validation_path = class_character | NULL, dat_test_path = class_character | NULL, weights = class_character | NULL, # column name in dat_training preprocessor_config = PreprocessorConfig | NULL, algorithm = class_character | NULL, hyperparameters = Hyperparameters | NULL, tuner_config = TunerConfig | NULL, outer_resampling_config = ResamplerConfig | NULL, execution_config = ExecutionConfig, question = class_character | NULL, outdir = class_character, verbosity = class_integer ) ) # /rtemis::SuperConfig # %% repr.SuperConfig ---- #' Repr SuperConfig #' #' @param x `SuperConfig` object. #' @param pad Integer: Number of spaces to pad the message with. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted string that can be printed with cat() #' #' @author EDG #' @noRd method(repr, SuperConfig) <- function(x, pad = 0L, output_type = NULL) { out <- repr_S7name("SuperConfig", pad = pad, output_type = output_type) out <- paste0( out, repr_ls(props(x), pad = pad, limit = 20L, output_type = output_type) ) out } # /rtemis::repr.SuperConfig # %% print.SuperConfig ---- #' Print `SuperConfig` #' #' Print `SuperConfig` object #' #' @param x `SuperConfig` object. #' @param ... Not used. #' #' @author EDG #' @noRd method(print, SuperConfig) <- function(x, output_type = NULL, ...) { cat(repr(x, output_type = output_type)) invisible(x) } # /rtemis::print.SuperConfig # %% setup_SuperConfig ---- #' Setup SuperConfig #' #' Setup `SuperConfig` object. #' #' @param dat_training_path Character: Path to training data file. #' @param dat_validation_path Character: Path to validation data file. #' @param dat_test_path Character: Path to test data file. #' @param weights Optional Character: Column name in training data to use as observation weights. #' If NULL, no weights are used. #' @param preprocessor_config `PreprocessorConfig` object: Configuration for data preprocessing. #' @param algorithm Character: Algorithm to use for training. #' @param hyperparameters `Hyperparameters` object: Configuration for model hyperparameters. #' @param tuner_config `TunerConfig` object: Configuration for hyperparameter tuning. #' @param outer_resampling_config `ResamplerConfig` object: Configuration for outer res #' resampling during model training. #' @param execution_config `ExecutionConfig` object: Configuration for execution settings. Setup #' with [setup_ExecutionConfig]. #' @param question Character: Question to answer with the supervised learning analysis. #' @param outdir Character: Output directory for results. #' @param verbosity Integer: Verbosity level. #' #' @return `SuperConfig` object. #' #' @author EDG #' @export #' #' @examples #' sc <- setup_SuperConfig( #' dat_training_path = "train.csv", #' preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE), #' algorithm = "LightRF", #' hyperparameters = setup_LightRF(), #' tuner_config = setup_GridSearch(), #' outer_resampling_config = setup_Resampler(), #' execution_config = setup_ExecutionConfig(), #' question = "Can we tell iris species apart given their measurements?", #' outdir = "models/" #' ) setup_SuperConfig <- function( dat_training_path, dat_validation_path = NULL, dat_test_path = NULL, weights = NULL, preprocessor_config = NULL, algorithm = NULL, hyperparameters = NULL, tuner_config = NULL, outer_resampling_config = NULL, execution_config = setup_ExecutionConfig(), question = NULL, outdir = "results/", verbosity = 1L ) { # Sanitize paths for security dat_training_path <- sanitize_path(dat_training_path, must_exist = FALSE) if (!is.null(dat_validation_path)) { dat_validation_path <- sanitize_path( dat_validation_path, must_exist = FALSE ) } if (!is.null(dat_test_path)) { dat_test_path <- sanitize_path(dat_test_path, must_exist = FALSE) } outdir <- sanitize_path(outdir, must_exist = FALSE, type = "any") SuperConfig( dat_training_path = dat_training_path, dat_validation_path = dat_validation_path, dat_test_path = dat_test_path, weights = weights, preprocessor_config = preprocessor_config, algorithm = algorithm, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampling_config = outer_resampling_config, execution_config = execution_config, question = question, outdir = outdir, verbosity = verbosity ) } # /setup_SuperConfig # %% to_toml.SuperConfig ---- #' Convert `SuperConfig` to TOML #' #' Convert `SuperConfig` object to TOML format for saving to file that can be read back in with #' `read_config()`. #' #' @param x `SuperConfig` object. #' #' @return Character: TOML string representation of the `SuperConfig` object. #' #' @author EDG #' @keywords internal #' @noRd method(to_toml, SuperConfig) <- function(x) { check_dependencies("toml") xl <- S7_to_list(props(x)) toml_with_meta(x, xl) } # /rtemis::to_toml.SuperConfig # %% write_toml.SuperConfig ---- #' @name #' write_toml #' #' @param x `SuperConfig` object. #' @param file Character: Path to output TOML file. #' @param overwrite Logical: If TRUE, overwrite existing file. #' @param verbosity Integer: Verbosity level. #' #' @return `SuperConfig` object, invisibly. #' #' @author EDG #' @rdname write_toml #' #' @examples #' x <- setup_SuperConfig( #' dat_training_path = "~/Data/iris.csv", #' dat_validation_path = NULL, #' dat_test_path = NULL, #' weights = NULL, #' preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE), #' algorithm = "LightRF", #' hyperparameters = setup_LightRF(), #' tuner_config = setup_GridSearch(), #' outer_resampling_config = setup_Resampler(), #' execution_config = setup_ExecutionConfig(), #' question = "Can we tell iris species apart given their measurements?", #' outdir = "models/", #' verbosity = 1L #' ) #' tmpdir <- tempdir() #' write_toml(x, file.path(tmpdir, "rtemis.toml")) method(write_toml, SuperConfig) <- function( x, file, overwrite = FALSE, verbosity = 1L ) { toml_str <- to_toml(x) write_lines( toml_str, file = file, overwrite = overwrite, verbosity = verbosity ) invisible(x) } # /rtemis::write_toml.SuperConfig # %% read_config ---- #' Read `SuperConfig` from TOML file #' #' Read `SuperConfig` object from TOML file that was written with `write_toml()`. #' #' @param file Character: Path to input TOML file. #' #' @return `SuperConfig` object. #' #' @author EDG #' @export #' #' @examples #' # Create a SuperConfig object #' x <- setup_SuperConfig( #' dat_training_path = "~/Data/iris.csv", #' algorithm = "LightRF", #' hyperparameters = setup_LightRF() #' ) #' # Write TOML file #' tmpdir <- tempdir() #' tmpfile <- file.path(tmpdir, "rtemis_test.toml") #' write_toml(x, tmpfile) #' # Read config from TOML file #' x_read <- read_config(tmpfile) read_config <- function(file) { check_dependencies("toml") file <- sanitize_path(file, must_exist = TRUE, type = "file") xl <- toml::read_toml(file) xl <- toml_empty_to_null(xl) # Convert list to SuperConfig object setup_SuperConfig( dat_training_path = xl[["dat_training_path"]], dat_validation_path = xl[["dat_validation_path"]], dat_test_path = xl[["dat_test_path"]], weights = xl[["weights"]], preprocessor_config = if (is.null(xl[["preprocessor_config"]])) { NULL } else { do.call(setup_Preprocessor, xl[["preprocessor_config"]]) }, algorithm = xl[["algorithm"]], hyperparameters = if (is.null(xl[["hyperparameters"]])) { NULL } else { .list_to_Hyperparameters(xl[["hyperparameters"]]) }, tuner_config = if (is.null(xl[["tuner_config"]])) { NULL } else { .list_to_TunerConfig(xl[["tuner_config"]]) }, outer_resampling_config = if (is.null(xl[["outer_resampling_config"]])) { NULL } else { .list_to_ResamplerConfig(xl[["outer_resampling_config"]]) }, execution_config = if (is.null(xl[["execution_config"]])) { setup_ExecutionConfig() } else { do.call(setup_ExecutionConfig, xl[["execution_config"]]) }, question = iflengthy(xl[["question"]]), outdir = iflengthy(xl[["outdir"]]), verbosity = iflengthy(xl[["verbosity"]]) ) } # /rtemis::read_config # %% to_yaml.SuperConfig ---- #' Convert `SuperConfig` to YAML #' #' Convert `SuperConfig` object to YAML format for saving to file that can be read back in with #' `read_config()`. #' #' @param x `SuperConfig` object. #' #' @return Character: YAML string representation of the `SuperConfig` object. #' #' @author EDG #' @keywords internal #' @noRd method(to_yaml, SuperConfig) <- function(x) { xl <- S7_to_list(props(x)) yaml::as.yaml(xl) } # /rtemis::to_yaml.SuperConfig # %% SuperConfigLive ---- #' SuperConfigLive Class #' #' @description #' Like `SuperConfig`, but carries in-memory training/validation/test data #' instead of file paths. Used by `rtemislive` (uploads arrive over a WS #' frame, not as a file) and by future HPC submission paths that hand the #' data directly to a worker. #' #' Not TOML-serialisable — in-memory data does not round-trip cleanly to #' a config file. Use `SuperConfig` when you need on-disk reproducibility. #' #' @author EDG #' @noRd SuperConfigLive <- new_class( name = "SuperConfigLive", properties = list( dat_training = class_tabular, dat_validation = class_tabular | NULL, dat_test = class_tabular | NULL, weights = class_character | NULL, # column name in dat_training preprocessor_config = PreprocessorConfig | NULL, algorithm = class_character | NULL, hyperparameters = Hyperparameters | NULL, tuner_config = TunerConfig | NULL, outer_resampling_config = ResamplerConfig | NULL, execution_config = ExecutionConfig, question = class_character | NULL, outdir = class_character | NULL, verbosity = class_integer ) ) # /rtemis::SuperConfigLive # %% repr.SuperConfigLive ---- #' @author EDG #' @noRd method(repr, SuperConfigLive) <- function(x, pad = 0L, output_type = NULL) { out <- repr_S7name("SuperConfigLive", pad = pad, output_type = output_type) # Replace heavy data slots with a {rows, cols} summary so the printout # stays readable. pl <- props(x) fmt_dim <- function(d) { if (is.null(d)) { return(NULL) } paste0("<", NROW(d), " x ", NCOL(d), ">") } pl[["dat_training"]] <- fmt_dim(pl[["dat_training"]]) pl[["dat_validation"]] <- fmt_dim(pl[["dat_validation"]]) pl[["dat_test"]] <- fmt_dim(pl[["dat_test"]]) out <- paste0( out, repr_ls(pl, pad = pad, limit = 20L, output_type = output_type) ) out } # /rtemis::repr.SuperConfigLive # %% print.SuperConfigLive ---- #' @author EDG #' @noRd method(print, SuperConfigLive) <- function(x, output_type = NULL, ...) { cat(repr(x, output_type = output_type)) invisible(x) } # /rtemis::print.SuperConfigLive # %% setup_SuperConfigLive ---- #' Setup SuperConfigLive #' #' Build a `SuperConfigLive` — same shape as [setup_SuperConfig] but with #' in-memory tabular data instead of file paths. #' #' @param dat_training data.frame or data.table. Training data. #' @param dat_validation data.frame, data.table, or `NULL`. #' @param dat_test data.frame, data.table, or `NULL`. #' @param weights Character or `NULL`. Column name in `dat_training` used #' as observation weights. #' @param preprocessor_config,algorithm,hyperparameters,tuner_config,outer_resampling_config,execution_config,question,verbosity #' See [setup_SuperConfig]. #' @param outdir Character or `NULL`. Output directory; `NULL` (the #' default) means "do not write to disk" (the rtemislive case). #' #' @return `SuperConfigLive` object. #' #' @author EDG #' @export setup_SuperConfigLive <- function( dat_training, dat_validation = NULL, dat_test = NULL, weights = NULL, preprocessor_config = NULL, algorithm = NULL, hyperparameters = NULL, tuner_config = NULL, outer_resampling_config = NULL, execution_config = setup_ExecutionConfig(), question = NULL, outdir = NULL, verbosity = 1L ) { if (!is.null(outdir)) { outdir <- sanitize_path(outdir, must_exist = FALSE, type = "any") } SuperConfigLive( dat_training = dat_training, dat_validation = dat_validation, dat_test = dat_test, weights = weights, preprocessor_config = preprocessor_config, algorithm = algorithm, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampling_config = outer_resampling_config, execution_config = execution_config, question = question, outdir = outdir, verbosity = as.integer(verbosity) ) } # /rtemis::setup_SuperConfigLive ================================================ FILE: R/15_CheckData.R ================================================ # CheckData.R # ::rtemis:: # 2025- EDG rtemis.org # %% CheckData ---- #' @author EDG #' @noRd CheckData <- new_class( name = "CheckData", properties = list( object_class = class_character, name = class_character, n_rows = class_integer, n_cols = class_integer, n_numeric = class_integer, n_integer = class_integer, n_character = class_integer, n_factor = class_integer, n_ordered = class_integer, n_date = class_integer, n_constant = class_integer, n_duplicates = class_integer, n_cols_anyna = class_integer, n_na = class_integer, classes_na = class_any | NULL, na_feature_pct = class_data.frame | NULL, na_case_pct = class_data.frame | NULL, n_na_last_col = class_integer | NULL ), constructor = function( object_class, name, n_rows, n_cols, n_numeric, n_integer, n_character, n_factor, n_ordered, n_date, n_constant, n_duplicates, n_cols_anyna, n_na, classes_na = NULL, na_feature_pct = NULL, na_case_pct = NULL, n_na_last_col = NULL ) { n_rows <- clean_int(n_rows) n_cols <- clean_int(n_cols) n_numeric <- clean_int(n_numeric) n_integer <- clean_int(n_integer) n_character <- clean_int(n_character) n_factor <- clean_int(n_factor) n_ordered <- clean_int(n_ordered) n_date <- clean_int(n_date) n_constant <- clean_int(n_constant) n_duplicates <- clean_int(n_duplicates) n_cols_anyna <- clean_int(n_cols_anyna) n_na <- clean_int(n_na) check_inherits(na_feature_pct, "data.frame") check_inherits(na_case_pct, "data.frame") n_na_last_col <- clean_int(n_na_last_col) new_object( S7_object(), object_class = object_class, name = name, n_rows = n_rows, n_cols = n_cols, n_numeric = n_numeric, n_integer = n_integer, n_character = n_character, n_factor = n_factor, n_ordered = n_ordered, n_date = n_date, n_constant = n_constant, n_duplicates = n_duplicates, n_cols_anyna = n_cols_anyna, n_na = n_na, classes_na = classes_na, na_feature_pct = na_feature_pct, na_case_pct = na_case_pct, n_na_last_col = n_na_last_col ) } ) # /rtemis::CheckData # %% `$`.CheckData ---- # Make CheckData properties `$`-accessible method(`$`, CheckData) <- function(x, name) { prop(x, name) } # /rtemis::`$`.CheckData # %% `.DollarNames`.CheckData ---- # `$`-autocomplete CheckData properties method(`.DollarNames`, CheckData) <- function(x, pattern = "") { all_names <- names(x) grep(pattern, all_names, value = TRUE) } # /rtemis::`.DollarNames`.CheckData # %% `[[`.CheckData ---- # Make CheckData properties `[[`-accessible method(`[[`, CheckData) <- function(x, name) { prop(x, name) } # /rtemis::`[[`.CheckData # %% repr.CheckData ---- #' Repr method for CheckData #' #' @param x CheckData object. #' #' @return Character: String representation of CheckData object. #' #' @author EDG #' @keywords internal #' @noRd method(repr, CheckData) <- function( x, name = NULL, check_integers = FALSE, pad = 0L, output_type = NULL ) { out <- repr_S7name(x) if (is.null(name)) { name <- x[["name"]] if (is.null(name)) name <- deparse(substitute(x)) } n_rows <- x[["n_rows"]] n_cols <- x[["n_cols"]] n_numeric <- x[["n_numeric"]] n_integer <- x[["n_integer"]] n_character <- x[["n_character"]] n_factor <- x[["n_factor"]] n_ordered <- x[["n_ordered"]] n_date <- x[["n_date"]] n_constant <- x[["n_constant"]] n_duplicates <- x[["n_duplicates"]] n_cols_anyna <- x[["n_cols_anyna"]] n_na <- x[["n_na"]] n_na_last_col <- x[["n_na_last_col"]] ## Object class and dimensions ---- out <- paste0( " ", highlight(name, pad = pad, output_type = output_type), paste( ": A", x[["object_class"]], "with", highlight( format(n_rows, trim = TRUE, big.mark = ",", scientific = FALSE), pad = pad, output_type = output_type ), ngettext(n_rows, "row", "rows"), "and", highlight( format(n_cols, trim = TRUE, big.mark = ",", scientific = FALSE), pad = pad, output_type = output_type ), ngettext(n_cols, "column.", "columns.") ) ) ## Data Types ---- isOrdered <- if (n_factor == 1) { paste(", which", ngettext(n_ordered, "is", "is not"), "ordered") } else if (n_factor > 1) { paste( ", of which", fmt(n_ordered, bold = TRUE, pad = pad, output_type = output_type), ngettext(n_ordered, "is", "are"), "ordered" ) } else { "" } out <- paste( out, fmt("\n Data types", bold = TRUE, pad = pad, output_type = output_type), paste( " *", fmt(n_numeric, bold = TRUE, pad = pad, output_type = output_type), "numeric", ngettext(n_numeric, "feature", "features") ), paste( " *", fmt(n_integer, bold = TRUE, pad = pad, output_type = output_type), "integer", ngettext(n_integer, "feature", "features") ), paste0( " * ", fmt(n_factor, bold = TRUE, pad = pad, output_type = output_type), ngettext(n_factor, " factor", " factors"), isOrdered ), paste( " *", fmt(n_character, bold = TRUE, pad = pad, output_type = output_type), "character", ngettext(n_character, "feature", "features") ), paste( " *", fmt(n_date, bold = TRUE, pad = pad, output_type = output_type), "date", ngettext(n_date, "feature", "features") ), sep = "\n" ) ## Issues ---- out <- paste( out, fmt("\n Issues", bold = TRUE, pad = pad, output_type = output_type), sep = "\n" ) out <- paste( out, paste( " *", fmt( n_constant, col = if (n_constant > 0) rt_red else NULL, bold = TRUE, pad = pad, output_type = output_type ), "constant", ngettext(n_constant, "feature", "features") ), sep = "\n" ) out <- paste( out, paste( " *", fmt( n_duplicates, col = if (n_duplicates > 0) rt_orange else NULL, bold = TRUE, pad = pad, output_type = output_type ), "duplicate", ngettext(n_duplicates, "case", "cases") ), sep = "\n" ) nas <- if (n_cols_anyna > 0) { classes_na <- x[["classes_na"]] .col <- if (n_cols_anyna > 0) rt_orange else NULL out_nas <- paste( fmt( n_cols_anyna, col = .col, bold = TRUE, pad = pad, output_type = output_type ), ngettext(n_cols_anyna, "feature includes", "features include"), "'NA' values;", fmt(n_na, col = .col, bold = TRUE, pad = pad, output_type = output_type), "'NA'", ngettext(n_na, "value", "values"), "total\n *", paste0( mapply( function(val, name) { paste( fmt( val, col = .col, bold = TRUE, pad = pad, output_type = output_type ), tolower(name) ) }, classes_na, names(classes_na) ), collapse = "; " ) ) if (n_na_last_col > 0) { out_nas <- paste( out_nas, paste0( "\n * ", fmt( n_na_last_col, col = .col, bold = TRUE, pad = pad, output_type = output_type ), ngettext(n_na_last_col, " missing value", " missing values"), " in the last column" ) ) } out_nas } else { paste( fmt("0", bold = TRUE, pad = pad, output_type = output_type), "missing values" ) } out <- paste0(out, "\n * ", nas) ## Recommendations ---- out <- paste( out, fmt( "\n Recommendations", bold = TRUE, pad = pad, output_type = output_type ), sep = "\n" ) if (sum(n_character, n_constant, n_duplicates, n_cols_anyna) > 0) { if (n_character > 0) { out <- paste( out, fmt( " * Consider converting character features to factors or excluding them.", col = rt_orange, bold = TRUE, pad = pad, output_type = output_type ), sep = "\n" ) } if (n_constant > 0) { out <- paste( out, fmt( (paste( " * Remove the constant", ngettext(n_constant, "feature.", "features.") )), col = rt_red, bold = TRUE, pad = pad, output_type = output_type ), sep = "\n" ) } if (n_duplicates > 0) { out <- paste( out, fmt( paste( " * Consider removing the duplicate", ngettext(n_duplicates, "case.", "cases.") ), col = rt_orange, bold = TRUE, pad = pad, output_type = output_type ), sep = "\n" ) } if (n_cols_anyna > 0) { out <- paste( out, fmt( paste( " * Consider using algorithms that can handle missingness or imputing missing values." ), col = rt_blue, bold = TRUE, pad = pad, output_type = output_type ), sep = "\n" ) # Note regarding missing values in last column if (n_na_last_col > 0) { out <- paste( out, fmt( "\n * Filter cases with missing values in the last column if using dataset for supervised learning.\n", col = rt_orange, bold = TRUE, pad = pad, output_type = output_type ) ) } } if (check_integers && n_integer > 0) { out <- paste( out, paste0( " * Check the", ifelse(n_integer > 1, paste("", n_integer, ""), " "), "integer", ngettext(n_integer, " feature", " features"), " and consider if", ngettext(n_integer, " it", " they"), " should be converted to ", ngettext(n_integer, "factor", "factors") ), sep = "\n" ) } } else { out <- paste( out, fmt( " * Everything looks good", col = rt_green, bold = TRUE, pad = pad, output_type = output_type ), sep = "\n" ) } paste0(out, "\n") } # /rtemis::repr.CheckData # %% print.CheckData ---- method(print, CheckData) <- function( x, name = NULL, check_integers = FALSE, output_type = NULL, ... ) { cat(repr( x, name = name, check_integers = check_integers, output_type = output_type )) invisible(x) } # /rtemis::print.CheckData ================================================ FILE: R/16_S7utils.R ================================================ # S7_utils # ::rtemis:: # 2025 EDG rtemis.org # %% SuperWorkers ---- #' @keywords internal #' @noRd SuperWorkers <- new_class( name = "SuperWorkers", properties = list( algorithm = class_character, max_workers = class_integer, max_workers_algorithm = class_integer, max_workers_tuning = class_integer, max_workers_resampling = class_integer ), constructor = function( algorithm, max_workers, max_workers_algorithm, max_workers_tuning, max_workers_resampling ) { max_workers <- clean_posint(max_workers) max_workers_algorithm <- clean_posint(max_workers_algorithm) max_workers_tuning <- clean_posint(max_workers_tuning) max_workers_resampling <- clean_posint(max_workers_resampling) # Validate input if ( max_workers_algorithm + max_workers_tuning + max_workers_resampling > max_workers ) { cli::cli_abort( "Total workers for algorithm, tuning, and resampling cannot exceed max_workers." ) } new_object( S7_object(), algorithm = algorithm, max_workers = max_workers, max_workers_algorithm = max_workers_algorithm, max_workers_tuning = max_workers_tuning, max_workers_resampling = max_workers_resampling ) } ) # /rtemis::SuperWorkers # %% BiasVariance ---- BiasVariance <- new_class( name = "BiasVariance", properties = list( bias_squared = class_numeric, mean_bias_squared = class_numeric, sd_bias_squared = class_numeric, variance = class_numeric, mean_variance = class_numeric, sd_variance = class_numeric ) ) # %% `[[`.BiasVariance ---- # Make BiasVariance props `[[`- accessible ---- method(`[[`, BiasVariance) <- function(x, name) { prop(x, name) } # %% repr.BiasVariance ---- method(repr, BiasVariance) <- function( x, pad = 0L, output_type = NULL ) { output_type <- get_output_type(output_type) paste0( repr_S7name("BiasVariance"), "Mean squared bias: ", highlight(ddSci(x[["mean_bias_squared"]]), output_type = output_type), " (", ddSci(x[["sd_bias_squared"]]), ")\n", "Mean variance: ", highlight( ddSci(x[["mean_variance"]]), output_type = output_type ), " (", ddSci(x[["sd_variance"]]), ")\n" ) } # /rtemis::repr.BiasVariance # %% print.BiasVariance ---- #' Print method for BiasVariance #' #' @param x BiasVariance object. #' @param ... Not used. #' #' @return `x`, invisibly. #' #' @author EDG #' @noRd method(print, BiasVariance) <- function(x, ...) { cat(repr(x)) invisible(x) } # /rtemis::print.BiasVariance ================================================ FILE: R/algorithmDB.R ================================================ # algorithmDB.R # ::rtemis:: # 2025 EDG rtemis.org # Supervised Learning ---- supervised_algorithms <- data.frame(rbind( c("CART", "Classification and Regression Trees", TRUE, TRUE, TRUE), c("GAM", "Generalized Additive Model", TRUE, TRUE, FALSE), c("GLM", "Generalized Linear Model", TRUE, TRUE, FALSE), c("GLMNET", "Elastic Net", TRUE, TRUE, TRUE), c("Isotonic", "Isotonic Regression", TRUE, TRUE, FALSE), c("LightCART", "Decision Tree", TRUE, TRUE, FALSE), c("LightGBM", "Gradient Boosting", TRUE, TRUE, FALSE), c("LightRF", "LightGBM Random Forest", TRUE, TRUE, FALSE), c("LightRuleFit", "LightGBM RuleFit", TRUE, TRUE, FALSE), c("Ranger", "Random Forest", TRUE, TRUE, FALSE), c( "LinearSVM", "Support Vector Machine with Linear Kernel", TRUE, TRUE, FALSE ), c( "RadialSVM", "Support Vector Machine with Radial Kernel", TRUE, TRUE, FALSE ), c("TabNet", "Attentive Interpretable Tabular Learning", TRUE, TRUE, FALSE) )) colnames(supervised_algorithms) <- c( "Name", "Description", "Class", "Reg", "Surv" ) supervised_multiclass <- c( "GLMNET", "CART", "LightCART", "LightRF", "LightGBM", "LinearSVM", "RadialSVM", "Ranger" ) get_alg_name <- function(algorithm) { algname <- supervised_algorithms[, 1][ tolower(algorithm) == tolower(supervised_algorithms[, 1]) ] if (length(algname) == 0) { cli::cli_abort(algorithm, "Incorrect algorithm specified") } algname } get_alg_setup <- function(algorithm) { paste0("setup_", get_alg_name(algorithm)) } #' Get algorithm description #' #' @param algorithm Character: Algorithm name. #' #' @return Character: Algorithm description. #' #' @author EDG #' #' @keywords internal #' @noRd desc_alg <- function(algorithm) { algdesc <- supervised_algorithms[, 2][ tolower(algorithm) == tolower(supervised_algorithms[, 1]) ] if (length(algdesc) == 0) { cli::cli_abort(algorithm, "Incorrect algorithm specified") } algdesc } # /rtemis::desc_alg #' Algorithm description with short name #' #' @param algorithm Character: Algorithm name. #' #' @return Character: Algorithm description with short name in parentheses. #' #' @author EDG #' #' @keywords internal #' @noRd desc_abb_alg <- function(algorithm) { paste0( desc_alg(algorithm), " (", get_alg_name(algorithm), ")" ) } # /rtemis::desc_abb_alg get_train_fn <- function(algorithm) { paste0("train_", get_alg_name(algorithm)) } # /rtemis::get_train_fn get_default_hyperparameters <- function(algorithm, type, ncols) { alg_name <- get_alg_name(algorithm) if (alg_name == "LightRF") { setup_LightRF( feature_fraction = if (type == "Classification") { sqrt(ncols) / ncols } else { 0.33 } ) } else { do.call(paste0("setup_", get_alg_name(algorithm)), list()) } } # /rtemis::get_default_hyperparameters # use e.g. in draw_scatter setup_alg <- function(algorithm, ...) { alg_name <- get_alg_name(algorithm) setup_fn <- get_alg_setup(algorithm) do_call(setup_fn, list(...)) } # /rtemis::setup_alg # Clustering ---- clust_algorithms <- data.frame(rbind( c("CMeans", "Fuzzy C-means Clustering"), c("DBSCAN", "Density-based spatial clustering of applications with noise"), # c("EMC", "Expectation Maximization Clustering"), c("HardCL", "Hard Competitive Learning"), # c("HOPACH", "Hierarchical Ordered Partitioning And Collapsing Hybrid"), # c("H2OKMeans", "H2O K-Means Clustering"), c("KMeans", "K-Means Clustering"), # c("MeanShift", "Mean Shift Clustering"), c("NeuralGas", "Neural Gas Clustering") # c("PAM", "Partitioning Around Medoids"), # c("PAMK", "Partitioning Around Medoids with k estimation"), # c("SPEC", "Spectral Clustering") )) get_clust_name <- function(algorithm) { clustname <- clust_algorithms[, 1][ tolower(algorithm) == tolower(clust_algorithms[, 1]) ] if (length(clustname) == 0) { cli::cli_abort(algorithm, "Incorrect clustering algorithm specified") } clustname } # /rtemis::get_clust_name get_clust_desc <- function(algorithm) { clustdesc <- clust_algorithms[, 2][ tolower(algorithm) == tolower(clust_algorithms[, 1]) ] if (length(clustdesc) == 0) { cli::cli_abort(algorithm, "Incorrect clustering algorithm specified") } clustdesc } # /rtemis::get_clust_desc get_clust_fn <- function(algorithm) { paste0("cluster_", get_clust_name(algorithm)) } # /rtemis::get_clust_fn get_default_clusterparams <- function(algorithm) { do.call(paste0("setup_", get_clust_name(algorithm)), list()) } get_clustpredict_fn <- function(algorithm) { paste0("clustpredict_", get_clust_name(algorithm)) } get_clust_setup_fn <- function(algorithm) { paste0("setup_", get_clust_name(algorithm)) } # /rtemis::get_clust_setup_fn # Decomposition ---- decom_algorithms <- data.frame(rbind( # c("H2OAE", "H2O Autoencoder"), # c("H2OGLRM", "H2O Generalized Low-Rank Model"), c("ICA", "Independent Component Analysis"), c("Isomap", "Isomap"), # c("KPCA", "Kernel Principal Component Analysis"), # c("LLE", "Locally Linear Embedding"), # c("MDS", "Multidimensional Scaling"), c("NMF", "Non-negative Matrix Factorization"), c("PCA", "Principal Component Analysis"), # c("SPCA", "Sparse Principal Component Analysis"), # c("SVD", "Singular Value Decomposition"), c("tSNE", "t-distributed Stochastic Neighbor Embedding"), c("UMAP", "Uniform Manifold Approximation and Projection") )) get_decom_name <- function(algorithm) { decomname <- decom_algorithms[, 1][ tolower(algorithm) == tolower(decom_algorithms[, 1]) ] if (length(decomname) == 0) { cli::cli_abort(algorithm, "Incorrect decomposition algorithm specified") } decomname } # /rtemis::get_decom_name get_decom_desc <- function(algorithm) { decomdesc <- decom_algorithms[, 2][ tolower(algorithm) == tolower(decom_algorithms[, 1]) ] if (length(decomdesc) == 0) { cli::cli_abort(algorithm, "Incorrect decomposition algorithm specified") } decomdesc } # /rtemis::get_decom_desc get_decom_fn <- function(algorithm) { paste0("decom_", get_decom_name(algorithm)) } # /rtemis::get_decom_fn get_default_decomparams <- function(algorithm) { do.call(paste0("setup_", get_decom_name(algorithm)), list()) } # /rtemis::get_default_decomparams get_decom_setup_fn <- function(algorithm) { paste0("setup_", get_decom_name(algorithm)) } # /rtemis::get_decom_setup_fn get_decom_predict_fn <- function(algorithm) { paste0("predict_", get_decom_name(algorithm)) } # /rtemis::get_decom_predict_fn #' Available Algorithms #' #' Print available algorithms for supervised learning, clustering, and decomposition. #' #' @rdname available_algorithms #' @aliases available_algorithms #' #' @param verbosity Integer: Verbosity level. #' @return Named list of algorithm descriptions, invisibly. #' #' @author EDG #' @export #' #' @examples #' available_supervised() available_supervised <- function(verbosity = 1L) { algs <- structure( supervised_algorithms[, 2], names = supervised_algorithms[, 1], class = "list" ) if (verbosity > 0L) { printls(algs, print_class = FALSE, limit = -1L) } invisible(algs) } #' @rdname available_algorithms #' @export #' #' @examples #' available_clustering() available_clustering <- function(verbosity = 1L) { algs <- structure( clust_algorithms[, 2], names = clust_algorithms[, 1], class = "list" ) if (verbosity > 0L) { printls(algs, print_class = FALSE, limit = -1L) } invisible(algs) } #' @rdname available_algorithms #' @export #' #' @examples #' available_decomposition() available_decomposition <- function(verbosity = 1L) { algs <- structure( decom_algorithms[, 2], names = decom_algorithms[, 1], class = "list" ) if (verbosity > 0L) { printls(algs, print_class = FALSE, limit = -1L) } invisible(algs) } # Draw ---- draw_fns <- data.frame( rbind( c("draw_3DScatter", "3D Scatter Plot"), c("draw_bar", "Bar Plot"), c("draw_box", "Box Plot"), c("draw_calibration", "Calibration Plot"), c("draw_confusion", "Confusion Matrix"), c("draw_dist", "Density and Histogram Plots"), c("draw_fit", "Scatter Plot with Fit Line alias"), c("draw_graphD3", "Network Graph using networkD3"), c("draw_graphjs", "Network Graph using graphjs"), c("draw_heat", "Heatmap using plotly"), c("draw_heatmap", "Heatmap using heatmaply"), c("draw_leafleat", "Choropleth Map using leaflet"), c("draw_pie", "Pie Chart"), c("draw_protein", "Amino Acid Annotation Plot"), c("draw_roc", "ROC Curve"), c("draw_scatter", "Scatter Plot"), c("draw_spectrogram", "Spectrogram"), c("draw_table", "Table using plotly"), c("draw_ts", "Time Series Plot"), c("draw_varimp", "Barplot for Variable Importance alias"), c("draw_volcano", "Volcano Plot"), c("draw_xt", "Time Series Line Plot") ) ) colnames(draw_fns) <- c("Function Name", "Description") #' Available Draw Functions #' #' Print available draw functions for visualization. #' #' @param verbosity Integer: Verbosity level. #' #' @return Named list of draw function descriptions, invisibly. #' #' @author EDG #' @export #' #' @examples #' available_draw() available_draw <- function(verbosity = 1L) { fns <- structure( draw_fns[, 2], names = draw_fns[, 1], class = "list" ) if (verbosity > 0L) { cat("Available draw functions:\n") printls(fns, print_class = FALSE, limit = -1L) } invisible(fns) } # /rtemis::available_draw ================================================ FILE: R/calibrate.R ================================================ # calibrate.R # ::rtemis:: # 2025 EDG rtemis.org # %% calibrate.Classification ---- #' Calibrate Binary Classification Models #' #' @description #' The goal of calibration is to adjust the predicted probabilities of a binary classification #' model so that they better reflect the true probabilities (i.e. empirical risk) of the positive #' class. #' #' @details #' Important: The calibration model's training data should be different from the classification #' model's training data. #' #' @param x `Classification` object. #' @param predicted_probabilities Numeric vector: Predicted probabilities. #' @param true_labels Factor: True class labels. #' @param algorithm Character: Algorithm to use to train calibration model. #' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions. #' @param verbosity Integer: Verbosity level. #' @param ... Not used #' #' @return `CalibratedClassification` object. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' datc2 <- data.frame( #' gn = factor(sample(c("alpha", "beta", "gamma"), 100, replace = TRUE)), #' iris[51:150, ] #' ) #' res <- resample(datc2) #' datc2$Species <- factor(datc2$Species) #' datc2_train <- datc2[res[[1]], ] #' datc2_test <- datc2[-res[[1]], ] #' mod_c_glm <- train( #' x = datc2_train, #' dat_test = datc2_test, #' algorithm = "glm" #' ) #' mod_c_glm_cal <- calibrate( #' mod_c_glm, #' predicted_probabilities = mod_c_glm$predicted_prob_training, #' true_labels = mod_c_glm$y_training #' ) #' mod_c_glm_cal method(calibrate, Classification) <- function( x, predicted_probabilities, true_labels, algorithm = "isotonic", hyperparameters = NULL, verbosity = 1L, ... ) { # Check inputs check_float01inc(predicted_probabilities) check_inherits(true_labels, "factor") # Training data is whatever is passed by user dat <- data.table(predicted_probabilities, true_labels) # Test data is taken from mod, if available if (!is.null(x@y_test) && !is.null(x@predicted_prob_test)) { dat_test <- data.table( predicted_probabilities = x@predicted_prob_test, true_labels = x@y_test ) } else { dat_test <- NULL } # Calibration model if (verbosity > 0L) { msg( fmt("<>", col = col_calibrator, bold = TRUE), "Calibrating", x@algorithm, "classification..." ) } cal_model <- train( dat, dat_test = dat_test, algorithm = algorithm, hyperparameters = hyperparameters, verbosity = verbosity ) mod_cal <- CalibratedClassification(x, cal_model) if (verbosity > 0L) { message() print(mod_cal) message() } if (verbosity > 0L) { msg(fmt("", col = col_calibrator, bold = TRUE), "Calibration done.") } mod_cal } # /rtemis::calibrate # %% calibrate.ClassificationRes ---- #' Calibrate Resampled Classification Models #' #' @param x `ClassificationRes` object. #' @param algorithm Character: Algorithm to use to train calibration model. #' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions. #' @param resampler_config `ResamplerConfig` object: Configuration for resampling during calibration model training. #' @param train_verbosity Integer: Verbosity level for training calibration models. #' @param verbosity Integer: Verbosity level. #' @param ... Not used #' #' @return `CalibratedClassificationRes` object. #' @author EDG #' @keywords internal #' @noRd method(calibrate, ClassificationRes) <- function( x, algorithm = "isotonic", hyperparameters = NULL, resampler_config = setup_Resampler( n_resamples = 5L, type = "KFold" ), train_verbosity = 0L, verbosity = 1L, ... ) { # Check inputs check_inherits(algorithm, "character") check_is_S7(resampler_config, ResamplerConfig) verbosity <- clean_int(verbosity) # Check IFW is FALSE if (!is.null(hyperparameters) && hyperparameters[["ifw"]]) { cli::cli_abort("IFW must be FALSE for proper calibration.") } # Calibration models if (verbosity > 0L) { msg( fmt("<>", col = col_calibrator, bold = TRUE), "Calibrating", x@algorithm, "resampled classification..." ) } calmods <- lapply( x@models, function(mod) { dat <- data.table( predicted_probabilities = mod@predicted_prob_test, true_labels = mod@y_test ) train( dat, algorithm = algorithm, hyperparameters = hyperparameters, outer_resampling_config = resampler_config, verbosity = train_verbosity ) } ) names(calmods) <- names(x@models) # CalibratedClassificationRes modres_cal <- CalibratedClassificationRes(x, calmods) # Outro ---- if (verbosity > 0L) { message() print(modres_cal) message() } if (verbosity > 0L) { msg(fmt("", col = col_calibrator, bold = TRUE), "Calibration done.") } modres_cal } # /rtemis::calibrate.ClassificationRes ================================================ FILE: R/check_data.R ================================================ # check_data.R # ::rtemis:: # 2022- EDG rtemis.org # %% check_data ---- #' Check Data #' #' @param x tabular data: Input to be checked. #' @param name Character: Name of dataset. #' @param get_duplicates Logical: If TRUE, check for duplicate cases. #' @param get_na_case_pct Logical: If TRUE, calculate percent of NA values per #' case. #' @param get_na_feature_pct Logical: If TRUE, calculate percent of NA values #' per feature. #' #' @return `CheckData` object. #' #' @author EDG #' @export #' #' @examples #' n <- 1000 #' x <- rnormmat(n, 50, return_df = TRUE) #' x$char1 <- sample(letters, n, TRUE) #' x$char2 <- sample(letters, n, TRUE) #' x$fct <- factor(sample(letters, n, TRUE)) #' x <- rbind(x, x[1, ]) #' x$const <- 99L #' x[sample(nrow(x), 20), 3] <- NA #' x[sample(nrow(x), 20), 10] <- NA #' x$fct[30:35] <- NA #' check_data(x) check_data <- function( x, name = NULL, get_duplicates = TRUE, get_na_case_pct = FALSE, get_na_feature_pct = FALSE ) { if (is.null(name)) { name <- deparse(substitute(x)) } # Check is tabular check_tabular(x) # Get class of x before converting to data.table object_class <- class(x)[1] # Convert to data.table x <- as.data.table(x) n_rows <- NROW(x) n_cols <- NCOL(x) # Data Types ---- classes <- sapply(x, \(v) class(v)[1]) counts <- table(classes) ## Numeric ---- n_numeric <- max0(counts["numeric"]) ## Integers ---- n_integer <- max0(counts["integer"]) ## Characters ---- n_character <- max0(counts["character"]) ## Factors ---- index_factor <- which(sapply(x, is.factor)) n_factor <- length(index_factor) index_ordered <- which(sapply(x, is.ordered)) n_ordered <- length(index_ordered) ## Dates ---- n_date <- sum( max0(counts["Date"]), max0(counts["IDate"]), max0(counts["POSIXct"]), max0(counts["POSIXlt"]) ) # Issues ---- ## Constants ---- index_constant <- which(sapply(x, is_constant)) n_constant <- length(index_constant) ## Duplicates ---- n_duplicates <- if (get_duplicates) { n_rows - uniqueN(x) } else { NA } ## NAs ---- cols_anyna <- which(sapply(x, anyNA)) n_cols_anyna <- length(cols_anyna) index_na <- which(is.na(x)) n_na <- length(index_na) ## Get percent of NA values per feature and per case if (n_cols_anyna > 0) { na_feature_pct <- if (get_na_feature_pct) { data.frame( Feature = names(cols_anyna), Pct_NA = sapply(seq_len(n_cols_anyna), \(i) { sum(is.na(x[[cols_anyna[i]]])) / n_rows }) ) } else { NULL } index_incomplete <- which(!complete.cases(x)) n_incomplete <- length(index_incomplete) na_case_pct <- if (get_na_case_pct) { data.frame( Case = index_incomplete, Pct_NA = sapply(seq_len(n_incomplete), \(i) { sum(is.na(x[index_incomplete[i], ])) / n_cols }) ) } else { NULL } # Get types of features with NA classes_na <- table(classes[cols_anyna]) # Get N of NAs in last column n_na_last_col <- sum(is.na(x[[n_cols]])) } else { n_na_last_col <- 0 classes_na <- NULL na_feature_pct <- if (get_na_feature_pct) { data.frame( Feature = character(0), Pct_NA = double(0) ) } else { NULL } na_case_pct <- if (get_na_case_pct) { data.frame( Case = integer(0), Pct_NA = double(0) ) } else { NULL } } # CheckData ---- CheckData( object_class = object_class, name = name, n_rows = n_rows, n_cols = n_cols, n_numeric = n_numeric, n_integer = n_integer, n_character = n_character, n_factor = n_factor, n_ordered = n_ordered, n_date = n_date, n_constant = n_constant, n_duplicates = n_duplicates, n_cols_anyna = n_cols_anyna, n_na = n_na, classes_na = classes_na, na_feature_pct = na_feature_pct, na_case_pct = na_case_pct, n_na_last_col = n_na_last_col ) } # /rtemis::check_data # %% max0 ---- #' Helper function to get max or 0 #' #' @param x Numeric vector #' #' @return Numeric: max(x, 0) #' #' @author EDG #' @keywords internal #' @noRd max0 <- function(x) max(x, 0, na.rm = TRUE) # %% to_html.CheckData ---- #' Generate `CheckData` object description in HTML #' #' @param x `CheckData` object #' @param name Character: Name of the data set #' @param css List: CSS styles #' #' @return `shiny.tag` object. #' #' @author EDG #' @keywords internal #' @noRd method(to_html, CheckData) <- function( x, name = NULL, css = list( font_family = "Helvetica", color = "#fff", background_color = "#242424" ) ) { n_rows <- x[["n_rows"]] n_cols <- x[["n_cols"]] n_numeric <- x[["n_numeric"]] n_integer <- x[["n_integer"]] n_character <- x[["n_character"]] n_factor <- x[["n_factor"]] n_ordered <- x[["n_ordered"]] n_date <- x[["n_date"]] n_constant <- x[["n_constant"]] n_duplicates <- x[["n_duplicates"]] n_cols_anyna <- x[["n_cols_anyna"]] n_na <- x[["n_na"]] classes_na <- x[["classes_na"]] ## Data Types ---- numeric <- HTML(paste( strong(n_numeric), "numeric", ngettext(n_numeric, "feature", "features") )) integer <- HTML(paste( strong(n_integer), "integer", ngettext(n_integer, "feature", "features") )) categorical <- HTML(paste0( strong(n_factor), ngettext(n_factor, " factor", " factors"), if (n_factor == 1) { paste(", which", ngettext(n_ordered, "is", "is not"), "ordered") } else if (n_factor > 1) { paste( ", of which", strong(n_ordered), ngettext(n_ordered, "is", "are"), "ordered" ) } )) # .col <- if (n_character > 0) html_orange else strong .col <- strong characters <- HTML(paste( .col(n_character), "character", ngettext(n_character, "feature", "features") )) dates <- HTML(paste( strong(n_date), "date", ngettext(n_date, "feature", "features") )) ## Issues ---- .col <- if (n_constant > 0) html_red else strong constants <- HTML(paste( .col(n_constant), "constant", ngettext(n_constant, "feature", "features") )) .col <- if (n_duplicates > 0) html_orange else strong duplicates <- HTML(paste( .col(n_duplicates), "duplicate", ngettext(n_duplicates, "case", "cases") )) .col <- if (n_cols_anyna > 0) html_orange else strong nas <- if (n_cols_anyna > 0) { HTML(paste( .col(n_cols_anyna), ngettext(n_cols_anyna, "feature includes", "features include"), "'NA' values; ", .col(n_na), "'NA'", ngettext(n_na, "value", "values"), "total", tags[["ul"]]( lapply(seq_along(classes_na), \(i) { tags[["li"]](HTML(paste( .col(classes_na[i]), tolower(names(classes_na)[i]) # ngettext(classes_na[i], "feature", "features") ))) }) ) )) } else { HTML(paste(strong("0"), "missing values")) } ## Recs ---- rec_constant <- if (n_constant > 0) { tags[["li"]](HTML(paste(html_orange( "Remove the constant", ngettext(n_constant, "feature", "features") )))) } else { NULL } rec_dups <- if (n_duplicates > 0) { tags[["li"]](HTML(paste(html_orange( "Consider removing the duplicate", ngettext(n_duplicates, "case", "cases") )))) } else { NULL } rec_na <- if (n_cols_anyna > 0) { list( if (isTRUE(classes_na["factor"] > 0)) { tags[["li"]](HTML(paste(html_orange( "Consider assigning factor 'NA' values to new 'missing' level" )))) }, tags[["li"]](HTML(paste(html_orange( "Consider imputing missing values or using algorithms that can handle missing values" )))) ) } else { NULL } recs <- if (sum(n_constant, n_duplicates, n_cols_anyna) == 0) { tags[["li"]](html_success("Everything looks good")) } else { list( rec_constant, rec_dups, rec_na ) } ## out ---- div( p( div( html_highlight(name), ": A", x[["class"]], "with", html_highlight(n_rows), ngettext(n_rows, "row", "rows"), "and", html_highlight(n_cols), ngettext(n_cols, "feature", "features"), class = "checkdata-header" ) ), p( span(strong("Data types"), class = "sidelined"), tags[["ul"]]( tags[["li"]](numeric), tags[["li"]](integer), tags[["li"]](categorical), tags[["li"]](characters), tags[["li"]](dates) ) ), # p Data Types p( span(strong("Issues"), class = "sidelined"), tags[["ul"]]( tags[["li"]](constants), tags[["li"]](duplicates), tags[["li"]](nas) ) ), # p Issues p( span(strong("Recommendations"), class = "sidelined"), tags[["ul"]]( recs ) ), # p Recommendations class = "checkData", style = paste0( "font-family:", css[["font_family"]], "; color:", css[["color"]], "; background-color:", css[["background_color"]], ";" ) ) } # /rtemis::to_html.CheckData ================================================ FILE: R/check_input_data.R ================================================ # check_supervised.R # ::rtemis:: # EDG rtemis.org # Notes: # Some algorithms do not work with variable names containing dots (SparkML) # %% check_factor_levels.class_data.frame ---- method(check_factor_levels, class_data.frame) <- function(x, y, z) { if (!is.null(y) || !is.null(z)) { index_factor <- which(sapply(x, is.factor)) x_levels <- lapply(x[, index_factor, drop = FALSE], levels) if (!is.null(y)) { y_levels <- lapply(y[, index_factor, drop = FALSE], levels) if ( !all(sapply(seq_along(x_levels), function(i) { identical(x_levels[[i]], y_levels[[i]]) })) ) { cli::cli_abort( "Training and validation set factor levels do not match." ) } } if (!is.null(z)) { z_levels <- lapply(z[, index_factor, drop = FALSE], levels) if ( !all(sapply(seq_along(x_levels), function(i) { identical(x_levels[[i]], z_levels[[i]]) })) ) { cli::cli_abort("Training and test set factor levels do not match.") } } } invisible() } # /method(check_factor_levels, class_data.frame) method(check_factor_levels, class_data.table) <- function(x, y, z) { if (!is.null(y) || !is.null(z)) { index_factor <- which(sapply(x, is.factor)) x_levels <- lapply(x[, .SD, .SDcols = index_factor], levels) if (!is.null(y)) { y_levels <- lapply(y[, .SD, .SDcols = index_factor], levels) if ( !all(sapply(seq_along(x_levels), function(i) { identical(x_levels[[i]], y_levels[[i]]) })) ) { cli::cli_abort( "Training and validation set factor levels do not match." ) } } if (!is.null(z)) { z_levels <- lapply(z[, .SD, .SDcols = index_factor], levels) if ( !all(sapply(seq_along(x_levels), function(i) { identical(x_levels[[i]], z_levels[[i]]) })) ) { cli::cli_abort("Training and test set factor levels do not match.") } } } invisible() } # /method(check_factor_levels, class_data.table) #' Check data ahead of supervised learning #' #' @param x Data frame: Training set features and outcome in the last column. #' @param dat_validation Data frame: Validation set features and outcome in the last column. #' @param dat_test Data frame: Test set features and outcome in the last column. #' @param allow_missing Logical: If TRUE, allow missing values in the data. #' @param verbosity Integer: Verbosity level. #' #' @return NULL, invisibly. Stops execution if checks fail. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' res <- resample(iris) #' iris_train <- iris[res[[1]], ] #' iris_test <- iris[-res[[1]], ] #' check_supervised(iris_train, dat_test = iris_test) check_supervised <- function( x, dat_validation = NULL, dat_test = NULL, allow_missing = TRUE, verbosity = 1L ) { # if (upsample && downsample) cli::cli_abort("Only one of upsample and downsample can be TRUE") if (verbosity > 0L) { msgstart("Checking data is ready for training...") } # Check types ---- check_inherits(x, "data.frame") if (!is.null(dat_validation)) { check_inherits(dat_validation, "data.frame") } if (!is.null(dat_test)) { check_inherits(dat_test, "data.frame") } # Check dimensions ---- ncols <- NCOL(x) # Since one column must be outcome, need min of 2 columns if (ncols < 2) { cli::cli_abort("Data must contain at least 1 feature and 1 outcome column.") } if (!is.null(dat_validation)) { if (NCOL(dat_validation) != ncols) { cli::cli_abort( "\nValidation set must contain same number of columns as training set." ) } } if (!is.null(dat_test)) { if (NCOL(dat_test) != ncols) { cli::cli_abort( "Test set must contain same number of columns as training set." ) } } # Missing values ---- if (anyNA(outcome(x))) { cli::cli_abort("Training set outcome cannot contain any missing values.") } if (!allow_missing && anyNA(x)) { cli::cli_abort("Data should not contain missing values.") } # Outcome class ---- outcome_class <- class(x[[ncols]]) if (!outcome_class %in% c("integer", "numeric", "factor")) { cli::cli_abort("Outcome must be integer, numeric, or factor.") } if (!is.null(dat_validation)) { if (class(dat_validation[[ncols]]) != outcome_class) { cli::cli_abort("Training and validation outcome must be of same class.") } } if (!is.null(dat_test)) { if (class(dat_test[[ncols]]) != outcome_class) { cli::cli_abort("Training and test outcome must be of same class.") } } # Factor levels ---- # Check that factors across training, validation, and test contain the same levels. check_factor_levels(x = x, y = dat_validation, z = dat_test) if (verbosity > 0L) { msgdone() } invisible() } # /rtemis::check_supervised # %% check_unsupervised_data ---- #' Check data ahead of unsupervised learning #' #' @param x Data frame: Features for unsupervised learning. #' @param allow_missing Logical: If TRUE, allow missing values in the data. Default is FALSE. #' #' @return NULL, invisibly. Stops execution if checks fail. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' check_unsupervised_data(iris[, -5]) check_unsupervised_data <- function(x, allow_missing = FALSE, verbosity = 1L) { if (verbosity > 0L) { msgstart("Checking unsupervised data...") } if (NCOL(x) < 2) { cli::cli_abort("Data must contain at least 2 columns.") } if (any(sapply(x, function(x) !is.numeric(x)))) { cli::cli_abort("All columns must be numeric.") } if (!allow_missing && anyNA(x)) { cli::cli_abort("Data should not contain missing values.") } if (verbosity > 0L) { msgdone() } invisible() } # /rtemis::check_unsupervised_data ================================================ FILE: R/cluster.R ================================================ # cluster.R # ::rtemis:: # 2025 EDG rtemis.org # %% cluster ---- #' Perform Clustering #' #' Perform clustering on the rows (usually cases) of a dataset. #' #' @details #' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation. #' #' @param x Matrix or data.frame: Data to cluster. Rows are cases to be clustered. #' @param algorithm Character: Clustering algorithm. #' @param config List: Algorithm-specific config. #' @param verbosity Integer: Verbosity level. #' #' @return `Clustering` object. #' #' @author EDG #' @export #' #' @examples #' iris_km <- cluster(exc(iris, "Species"), algorithm = "KMeans") cluster <- function( x, algorithm = "KMeans", config = NULL, verbosity = 1L ) { # Checks ---- if (is.null(config)) { config <- get_default_clusterparams(algorithm) } check_is_S7(config, ClusteringConfig) # Intro ---- start_time <- intro(verbosity = verbosity) # Data ---- if (verbosity > 0L) { summarize_unsupervised(x) } # Cluster ---- algorithm <- get_clust_name(algorithm) if (verbosity > 0L) { msg0(bold(paste0("Clustering with ", algorithm, "..."))) } clust <- cluster_(config = config, x = x, verbosity = verbosity) # Clusters ---- clusters <- do_call( fn = get_clustpredict_fn(algorithm), args = list(clust = clust) ) if (!is.null(config[["k"]])) { # For algorithms where k is specified in config k <- config[["k"]] } else { # For algorithms where k is not prescribed, but determined from the clustering result k <- length(unique(clusters)) if (verbosity > 0L) { msg0(paste0("Found ", highlight(k), " clusters.")) } } # Outro ---- outro(start_time, verbosity = verbosity) Clustering( algorithm = algorithm, clust = clust, k = k, clusters = clusters, config = config ) } # /rtemis::cluster ================================================ FILE: R/cluster_CMeans.R ================================================ # cluster_CMeans.R # ::rtemis:: # 2025- EDG rtemis.org # %% cluster_.CMeansConfig ---- #' C-means Clustering #' #' @keywords internal #' @noRd method(cluster_, CMeansConfig) <- function(config, x, verbosity = 1L) { # Dependencies ---- check_dependencies("e1071") # Data ---- check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity) # Cluster ---- if (verbosity > 0L) { msg("Clustering with", config@algorithm, "...") } clust <- e1071::cmeans( x = x, centers = config[["k"]], iter.max = config[["max_iter"]], verbose = verbosity > 0L, dist = config[["dist"]], method = config[["method"]], m = config[["m"]], rate.par = config[["rate_par"]], weights = config[["weights"]], control = config[["control"]] ) check_inherits(clust, "fclust") clust } # /rtemis::cluster_.CMeansConfig # %% clustpredict_CMeans ---- clustpredict_CMeans <- function(clust) { check_inherits(clust, "fclust") clust[["cluster"]] } # /rtemis::clustpredict_CMeans ================================================ FILE: R/cluster_DBSCAN.R ================================================ # cluster_DBSCAN.R # ::rtemis:: # 2025 EDG rtemis.org # %% cluster_.DBSCANConfig ---- #' Density-based spatial clustering of applications with noise (DBSCAN) #' #' @keywords internal #' @noRd method(cluster_, DBSCANConfig) <- function(config, x, verbosity = 1L) { # Checks ---- check_is_S7(config, DBSCANConfig) # Dependencies ---- check_dependencies("dbscan") # Data ---- check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity) # Cluster ---- if (verbosity > 0L) { msg("Clustering with", config@algorithm, "...") } clust <- dbscan::dbscan( x = x, eps = config[["eps"]], minPts = config[["min_points"]], weights = config[["weights"]], borderPoints = config[["border_points"]], search = config[["search"]], bucketSize = config[["bucket_size"]], splitRule = config[["split_rule"]], approx = config[["approx"]] ) check_inherits(clust, "dbscan") clust } # /rtemis::cluster_.DBSCANConfig # %% clustpredict_DBSCAN ---- clustpredict_DBSCAN <- function(clust, dat_train = NULL, newdata = NULL) { check_inherits(clust, "dbscan") if (is.null(newdata)) { return(clust[["cluster"]]) } else { predict(clust, newdata = newdata, data = dat_train) } } # /rtemis::clustpredict_DBSCAN ================================================ FILE: R/cluster_flexclust.R ================================================ # cluster_KMeans.R # ::rtemis:: # 2025- EDG rtemis.org # %% cluster_.KMeansConfig ---- #' K-means Clustering #' #' @keywords internal #' @noRd method(cluster_, KMeansConfig) <- function(config, x, verbosity = 1L) { # Dependencies ---- check_dependencies("flexclust") # Data ---- check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity) # Cluster ---- if (verbosity > 0L) { msg("Clustering with", config@algorithm, "...") } clust <- flexclust::cclust( x = x, k = config[["k"]], dist = config[["dist"]], method = "kmeans" ) check_inherits(clust, "kcca") clust } # /rtemis::cluster_.KMeansConfig # %% cluster_.HardCLConfig ---- #' Hard Competitive Learning Clustering #' #' @keywords internal #' @noRd method(cluster_, HardCLConfig) <- function(config, x, verbosity = 1L) { # Checks ---- check_is_S7(config, HardCLConfig) # Dependencies ---- check_dependencies("flexclust") # Data ---- check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity) # Cluster ---- if (verbosity > 0L) { msg("Clustering with", config@algorithm, "...") } clust <- flexclust::cclust( x = x, k = config[["k"]], dist = config[["dist"]], method = "hardcl" ) check_inherits(clust, "kcca") clust } # /rtemis::cluster_.HardCLConfig # %% cluster_.NeuralGasConfig ---- #' Neural Gas Clustering #' #' @keywords internal #' @noRd method(cluster_, NeuralGasConfig) <- function(config, x, verbosity = 1L) { # Dependencies ---- check_dependencies("flexclust") # Data ---- check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity) # Cluster ---- if (verbosity > 0L) { msg("Clustering with", config@algorithm, "...") } clust <- flexclust::cclust( x = x, k = config[["k"]], dist = config[["dist"]], method = "neuralgas" ) check_inherits(clust, "kcca") clust } # /rtemis::cluster_.NeuralGasConfig # %% clustpredict_{KMeans,HardCL,NeuralGas} ---- #' clustpredict methods for KMeans, HardCL, NeuralGas #' #' @author EDG #' @keywords internal #' @noRd clustpredict_KMeans <- clustpredict_HardCL <- clustpredict_NeuralGas <- function( clust, newdata = NULL ) { check_inherits(clust, "kcca") flexclust::clusters(clust, newdata = newdata) } # /rtemis::clustpredict_{KMeans,HardCL,NeuralGas} ================================================ FILE: R/data_xt_example.R ================================================ #' Example longitudinal dataset #' #' A small synthetic dataset demonstrating various participation patterns #' in longitudinal data, suitable for examples with \code{\link{xtdescribe}}. #' #' @format A data frame with 30 rows and 4 variables: #' \describe{ #' \item{patient_id}{Integer: Patient identifier (1-10).} #' \item{year}{Integer: Year of measurement (2020-2024).} #' \item{blood_pressure}{Numeric: Systolic blood pressure measurement.} #' \item{treatment}{Character: Treatment group ("A" or "B").} #' } #' #' @details #' This dataset includes 10 patients measured at up to 5 time points (years 2020-2024). #' The dataset demonstrates various participation patterns typical in longitudinal studies: #' \itemize{ #' \item Complete participation (all time points) #' \item Early dropout #' \item Late entry #' \item Intermittent participation #' \item Single time point participation #' } #' #' @examples #' data(xt_example) #' head(xt_example) #' summary(xt_example) #' #' @keywords datasets "xt_example" ================================================ FILE: R/ddSci.R ================================================ # ddSci.R # ::rtemis:: # 2015- EDG rtemis.org #' Format Numbers for Printing #' #' 2 Decimal places, otherwise scientific notation #' #' Numbers will be formatted to 2 decimal places, unless this results in 0.00 (e.g. if input was .0032), #' in which case they will be converted to scientific notation with 2 significant figures. #' `ddSci` will return `0.00` if the input is exactly zero. #' This function can be used to format numbers in plots, on the console, in logs, etc. #' #' @param x Vector of numbers #' @param decimal_places Integer: Return this many decimal places. #' @param hi Float: Threshold at or above which scientific notation is used. #' @param as_numeric Logical: If TRUE, convert to numeric before returning. #' This will not force all numbers to print 2 decimal places. For example: #' 1.2035 becomes "1.20" if `as_numeric = FALSE`, but 1.2 otherwise #' This can be helpful if you want to be able to use the output as numbers / not just for printing. #' @return Formatted number #' #' @author EDG #' @export #' #' @examples #' x <- .34876549 #' ddSci(x) #' # "0.35" #' x <- .00000000457823 #' ddSci(x) #' # "4.6e-09" ddSci <- function(x, decimal_places = 2, hi = 1e06, as_numeric = FALSE) { if (is.null(x)) { if (as_numeric) { return(NULL) } else { return("NULL") } } # Do not format factors, characters, or integers. if (is.factor(x) || is.character(x)) { return(as.character(x)) } if (is.integer(x)) { return(x) } x <- as.list(unlist(x)) x <- lapply(x, as.numeric) xf <- list() # Check for non-zero decimals # decs <- sum(unlist(x) %% 1, na.rm = TRUE) > 0 for (i in seq(x)) { if (is.na(x[[i]])) { xf[[i]] <- NA } else { # if (decs & x[[i]] == 0) { # x[[i]] is zero but others have decimals if (x[[i]] == 0) { # always give requested decimal places xf[[i]] <- format(0, nsmall = decimal_places) } else { if (abs(x[[i]]) >= hi) { xf[[i]] <- format( round(x[[i]], decimal_places), scientific = TRUE, digits = decimal_places, nsmall = decimal_places ) } else { # if (decs) { # xf[[i]] <- ifelse(round(x[[i]], 2) != 0, # format(round(x[[i]], decimal_places), nsmall = decimal_places), # format(x[[i]], scientific = TRUE, digits = 2) # ) # } else { # xf[[i]] <- as.character(x[[i]]) # } xf[[i]] <- ifelse( round(x[[i]], 2) != 0, format(round(x[[i]], decimal_places), nsmall = decimal_places), format(x[[i]], scientific = TRUE, digits = 2) ) } } } } xf <- as.character(xf) if (as_numeric) { xf <- as.numeric(xf) } xf } # /rtemis::ddSci ================================================ FILE: R/ddb.R ================================================ # ddb.R # ::rtemis:: # 2022- EDG rtemis.org #' Read CSV using DuckDB #' #' Lazy-read a CSV file, optionally: filter rows, remove duplicates, #' clean column names, convert character to factor, collect. #' #' @param filename Character: file name; either full path or just the file name, #' if `datadir` is also provided. #' @param datadir Character: Optional path if `filename` is not full path. #' @param sep Character: Field delimiter/separator. #' @param header Logical: If TRUE, first line will be read as column names. #' @param quotechar Character: Quote character. #' @param ignore_errors Logical: If TRUE, ignore parsing errors (sometimes it's #' either this or no data, so). #' @param make_unique Logical: If TRUE, keep only unique rows. #' @param select_columns Character vector: Column names to select. #' @param filter_column Character: Name of column to filter on, e.g. "ID". #' @param filter_vals Numeric or Character vector: Values in `filter_column` to keep. #' `filter_column` to keep. #' @param character2factor Logical: If TRUE, convert character columns to #' factors. #' @param collect Logical: If TRUE, collect data and return structure class #' as defined by `returnobj`. #' @param progress Logical: If TRUE, print progress (no indication this works). #' @param returnobj Character: "data.frame" or "data.table" object class to #' return. If "data.table", data.frame object returned from #' `DBI::dbGetQuery` is passed to `data.table::setDT`; will add to #' execution time if very large, but then that's when you need a data.table. #' @param data.table.key Character: If set, this corresponds to a column name in the #' dataset. This column will be set as key in the data.table output. #' @param clean_colnames Logical: If TRUE, clean colnames with #' [clean_colnames]. #' @param verbosity Integer: Verbosity level. #' #' @return data.frame or data.table if `collect` is TRUE, otherwise a character with the SQL query #' #' @author EDG #' @export #' #' @examples #' \dontrun{ #' # Requires local CSV file; replace with your own path #' ir <- ddb_data("/Data/massive_dataset.csv", #' filter_column = "ID", #' filter_vals = 8001:9999 #' ) #' } ddb_data <- function( filename, datadir = NULL, sep = ",", header = TRUE, quotechar = "", ignore_errors = TRUE, make_unique = TRUE, select_columns = NULL, filter_column = NULL, filter_vals = NULL, character2factor = FALSE, collect = TRUE, progress = TRUE, returnobj = c("data.table", "data.frame"), data.table.key = NULL, clean_colnames = TRUE, verbosity = 1L ) { # Intro ---- check_dependencies("DBI", "duckdb") returnobj <- match.arg(returnobj) if (!is.null(data.table.key)) { returnobj <- "data.table" } path <- if (is.null(datadir)) { normalizePath(filename) } else { file.path(normalizePath(datadir), filename) } check_files(path, verbosity = 0L) fileext <- tools::file_ext(path) out <- paste( bold(highlight("\u25B6")), ifelse(collect, "Reading", "Lazy-reading"), highlight(basename(path)) ) if (!is.null(filter_column)) { out <- paste( out, bold(highlight("\u29e8")), "filtering on", bold(filter_column) ) } start_time <- intro(out, verbosity = verbosity) distinct <- ifelse(make_unique, "DISTINCT ", NULL) select <- if (!is.null(select_columns)) { ls2sel(select_columns) } else { "*" } # SQL ---- sql <- if (fileext == "parquet") { paste0( "SELECT ", paste0(distinct, select), " FROM read_parquet('", path, "')" ) } else { paste0( "SELECT ", paste0(distinct, select), " FROM read_csv_auto('", path, "', sep='", sep, "', quote='", quotechar, "', header=", header, ", ignore_errors=", ignore_errors, ")" ) } sql <- if (!is.null(filter_column)) { vals <- if (is.numeric(filter_vals)) { paste0(filter_vals, collapse = ", ") } else { paste0("'", paste0(filter_vals, collapse = "', '"), "'") } paste( sql, "WHERE", filter_column, "in (", vals, ");" ) } else { paste0(sql, ";") } # Collect ---- if (collect) { conn <- DBI::dbConnect(duckdb::duckdb()) on.exit(DBI::dbDisconnect(conn, shutdown = TRUE)) # on.exit( # tryCatch(DBI::dbRollback(conn), error = function(e) { # })) if (progress) { DBI::dbExecute(conn, "PRAGMA enable_progress_bar;") } out <- DBI::dbGetQuery(conn, sql) if (clean_colnames) { names(out) <- clean_colnames(out) } if (returnobj == "data.table") { data.table::setDT(out) if (!is.null(data.table.key)) { data.table::setkeyv(out, data.table.key) } } if (character2factor) { out <- preprocess(out, setup_Preprocessor(character2factor = TRUE)) } } else { out <- sql } # Outro ---- outro(start_time, verbosity = verbosity) out } # /rtemis::ddb_data # output: '"alpha", "beta", "gamma"' ls2sel <- function(x) { paste0( '"', paste0(x, collapse = '", "'), '"' ) } #' Collect a lazy-read duckdb table #' #' Collect a table read with `ddb_data(x, collect = FALSE)` #' #' @param sql Character: DuckDB SQL query, usually output of #' [ddb_data] with `collect = FALSE` #' @param progress Logical: If TRUE, show progress bar #' @param returnobj Character: data.frame or data.table: class of object to return #' #' @return `data.frame` or `data.table`. #' #' @author EDG #' @export #' #' @examples #' \dontrun{ #' # Requires local CSV file; replace with your own path #' sql <- ddb_data("/Data/iris.csv", collect = FALSE) #' ir <- ddb_collect(sql) #' } ddb_collect <- function( sql, progress = TRUE, returnobj = c("data.frame", "data.table") ) { returnobj <- match.arg(returnobj) conn <- DBI::dbConnect(duckdb::duckdb()) on.exit(DBI::dbDisconnect(conn, shutdown = TRUE)) if (progress) { DBI::dbExecute(conn, "PRAGMA enable_progress_bar;") } out <- DBI::dbGetQuery(conn, sql) if (returnobj == "data.table") { setDT(out) } out } # /rtemis::ddb_collect ================================================ FILE: R/decomp.R ================================================ # decomp.R # ::rtemis:: # 2025 EDG rtemis.org # %% decomp ---- #' Perform Data Decomposition #' #' Perform linear or non-linear decomposition of numeric data. #' #' @details #' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation. #' #' @param x Matrix or data frame: Input data. #' @param algorithm Character: Decomposition algorithm. #' @param config DecompositionConfig: Algorithm-specific config. #' @param verbosity Integer: Verbosity level. #' #' @return `Decomposition` object. #' #' @author EDG #' @export #' #' @examples #' iris_pca <- decomp(exc(iris, "Species"), algorithm = "PCA") decomp <- function(x, algorithm = "ICA", config = NULL, verbosity = 1L) { # Checks ---- if (is.null(config)) { config <- get_default_decomparams(algorithm) } check_is_S7(config, DecompositionConfig) # Intro ---- start_time <- intro(verbosity = verbosity) # Data ---- if (verbosity > 0L) { summarize_unsupervised(x) } # Decompose ---- algorithm <- get_decom_name(algorithm) if (verbosity > 0L) { msg0("Decomposing with ", algorithm, "...") } # decomp_ -> list with elements 'decom' and 'transformed' decom <- decomp_(config = config, x = x, verbosity = verbosity) # Outro ---- outro(start_time, verbosity = verbosity) Decomposition( algorithm = algorithm, config = config, decom = decom[["decom"]], transformed = decom[["transformed"]] ) } # /rtemis::decomp ================================================ FILE: R/decomp_ICA.R ================================================ # decom_ICA.R # ::rtemis:: # 2025 EDG rtemis.org # %% decomp_.ICAConfig ---- #' ICA Decomposition #' #' @keywords internal #' @noRd method(decomp_, ICAConfig) <- function(config, x, verbosity = 1L) { # Checks ---- check_dependencies("fastICA") check_unsupervised_data(x = x, allow_missing = FALSE) # Decompose ---- if (verbosity > 0L) { msg("Decomposing with", config@algorithm, "...") } decom <- fastICA::fastICA( X = as.matrix(x), n.comp = config[["k"]], alg.typ = config[["type"]], fun = config[["fun"]], alpha = config[["alpha"]], method = "C", row.norm = config[["row_norm"]], maxit = config[["maxit"]], tol = config[["tol"]], verbose = verbosity > 0L ) check_inherits(decom, "list") list(decom = decom, transformed = decom[["S"]]) } # /rtemis::decomp_.ICAConfig ================================================ FILE: R/decomp_Isomap.R ================================================ # decom_Isomap.R # ::rtemis:: # 2025 EDG rtemis.org # %% decomp_.IsomapConfig ---- #' Isomap Decomposition #' #' @keywords internal #' @noRd method(decomp_, IsomapConfig) <- function(config, x, verbosity = 1L) { # Checks ---- check_dependencies("vegan") check_unsupervised_data(x = x, allow_missing = FALSE) # Decompose ---- if (verbosity > 0L) { msg("Decomposing with", config@algorithm, "...") } dst <- vegan::vegdist(x = x, method = config[["dist_method"]]) decom <- vegan::isomap( dist = dst, ndim = config[["k"]], k = config[["nsd"]], path = config[["path"]] ) check_inherits(decom, "isomap") list(decom = decom, transformed = decom[["points"]]) } # /rtemis::decomp_.IsomapConfig ================================================ FILE: R/decomp_NMF.R ================================================ # decom_NMF.R # ::rtemis:: # 2025 EDG rtemis.org # %% decomp_.NMFConfig ---- #' Non-negative Matrix Factorization (NMF) #' #' Decomposes a data matrix into non-negative factors using NMF. #' #' @param x A numeric matrix or data frame to be decomposed. #' @param config `NMFConfig` object. #' @param verbosity Integer: Verbosity level. #' #' @return A list containing the decomposition and transformed data. #' #' @author EDG #' @keywords internal #' @noRd method(decomp_, NMFConfig) <- function(config, x, verbosity = 1L) { # Checks ---- check_is_S7(config, NMFConfig) check_dependencies("NMF") check_unsupervised_data(x = x, allow_missing = FALSE) # Decompose ---- if (verbosity > 0L) { msg("Decomposing with", config@algorithm, "...") } xm <- as.matrix(x) args <- list(x = t(xm), rank = config[["k"]], nrun = config[["nrun"]]) decom <- do_call(NMF::nmf, args) check_inherits(decom, "NMFfit") basis <- NMF::basis(decom) transformed <- xm %*% basis colnames(transformed) <- paste0("NMF_", seq_len(NCOL(transformed))) list(decom = decom, transformed = transformed) } # /rtemis::decomp_.NMFConfig ================================================ FILE: R/decomp_PCA.R ================================================ # decom_PCA.R # ::rtemis:: # 2025 EDG rtemis.org # %% decomp_.PCAConfig ---- #' PCA Decomposition #' #' @keywords internal #' @noRd method(decomp_, PCAConfig) <- function(config, x, verbosity = 1L) { # Checks ---- check_is_S7(config, PCAConfig) check_unsupervised_data(x = x, allow_missing = FALSE) # Decompose ---- if (verbosity > 0L) { msg("Decomposing with", config@algorithm, "...") } decom <- prcomp( x = x, center = config[["center"]], scale. = config[["scale"]], tol = config[["tol"]], rank. = config[["k"]] ) check_inherits(decom, "prcomp") list(decom = decom, transformed = decom[["x"]]) } # /rtemis::decomp_.PCAConfig ================================================ FILE: R/decomp_UMAP.R ================================================ # decom_UMAP.R # ::rtemis:: # 2025 EDG rtemis.org # %% decomp_.UMAPConfig ---- #' UMAP Decomposition #' #' @param x A numeric matrix or data frame to be decomposed. #' @param config `UMAPConfig` object. #' @param verbosity Integer: Verbosity level. #' #' @return A list containing the decomposition and transformed data. #' #' @author EDG #' @keywords internal #' @noRd method(decomp_, UMAPConfig) <- function(config, x, verbosity = 1L) { # Checks ---- check_is_S7(config, UMAPConfig) check_dependencies("uwot") check_unsupervised_data(x = x, allow_missing = FALSE) # Decompose ---- if (verbosity > 0L) { msg("Decomposing with", config@algorithm, "...") } args <- c( list(X = x, n_components = config[["k"]], ret_model = TRUE), config@config ) args[["k"]] <- NULL decom <- do_call( uwot::umap, args, error_pattern_suggestion = list( "as_cholmod_sparse" = "Try installing packages 'Matrix' and 'irlba' from source." ) ) # ret_model = TRUE returns list check_inherits(decom, "list") list(decom = decom, transformed = decom[["embedding"]]) } # /rtemis::decomp_.UMAPConfig ================================================ FILE: R/decomp_tSNE.R ================================================ # decom_tSNE.R # ::rtemis:: # 2025 EDG rtemis.org # %% decomp_.tSNEConfig ---- #' tSNE Decomposition #' #' @keywords internal #' @noRd method(decomp_, tSNEConfig) <- function(config, x, verbosity = 1L) { # Checks ---- check_is_S7(config, tSNEConfig) check_dependencies("Rtsne") check_unsupervised_data(x = x, allow_missing = FALSE) # Decompose ---- if (verbosity > 0L) { msg("Decomposing with", config@algorithm, "...") } args <- c(list(X = x, dims = config[["k"]]), config@config) args[["k"]] <- NULL decom <- do_call( Rtsne::Rtsne, args, error_pattern_suggestion = list( "Remove duplicates" = "Remove duplicates using `preprocess()" ) ) check_inherits(decom, "Rtsne") list(decom = decom, transformed = decom[["Y"]]) } # /rtemis::decomp_.tSNEConfig ================================================ FILE: R/draw_3Dscatter.R ================================================ # draw_3Dscatter.R # ::rtemis:: # 2019- EDG rtemis.org #' Interactive 3D Scatter Plots #' #' Draw interactive 3D scatter plots using `plotly`. #' #' @details #' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation. #' #' Note that `draw_3Dscatter` uses the theme's `plot_bg` as `grid_col`. #' #' @param x Numeric, vector/data.frame/list: x-axis data. #' @param y Numeric, vector/data.frame/list: y-axis data. #' @param z Numeric, vector/data.frame/list: z-axis data. #' @param fit Character: Fit method. #' @param cluster Character: Clustering method. #' @param cluster_config List: Config for clustering. #' @param group Factor: Grouping variable. #' @param formula Formula: Formula for non-linear least squares fit. #' @param rsq Logical: If TRUE, print R-squared values in legend if `fit` is set. #' @param mode Character, vector: "markers", "lines", "markers+lines". #' @param order_on_x Logical: If TRUE, order `x` and `y` on `x`. #' @param main Character: Main title. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param zlab Character: z-axis label. #' @param alpha Numeric: Alpha for markers. #' @param bg Background color. #' @param plot_bg Plot background color. #' @param theme `Theme` object. #' @param palette Character vector: Colors to use. #' @param axes_square Logical: If TRUE, draw a square plot. #' @param group_names Character: Names for groups. #' @param font_size Numeric: Font size. #' @param marker_col Color for markers. #' @param marker_size Numeric: Marker size. #' @param fit_col Color for fit line. #' @param fit_alpha Numeric: Alpha for fit line. #' @param fit_lwd Numeric: Line width for fit line. #' @param tick_font_size Numeric: Tick font size. #' @param spike_col Spike lines color. #' @param legend Logical: If TRUE, draw legend. #' @param legend_xy Numeric: Position of legend. #' @param legend_xanchor Character: X anchor for legend. #' @param legend_yanchor Character: Y anchor for legend. #' @param legend_orientation Character: Orientation of legend. #' @param legend_col Color for legend text. #' @param legend_bg Color for legend background. #' @param legend_border_col Color for legend border. #' @param legend_borderwidth Numeric: Border width for legend. #' @param legend_group_gap Numeric: Gap between legend groups. #' @param margin Numeric, named list: Margins for top, bottom, left, right. #' @param fit_params `Hyperparameters` for fit. #' @param width Numeric: Width of plot. #' @param height Numeric: Height of plot. #' @param padding Numeric: Graph padding. #' @param displayModeBar Logical: If TRUE, display mode bar. #' @param modeBar_file_format Character: File format for mode bar. #' @param verbosity Integer: Verbosity level. #' @param filename Character: Filename to save plot. #' @param file_width Numeric: Width of saved file. #' @param file_height Numeric: Height of saved file. #' @param file_scale Numeric: Scale of saved file. #' #' @return A `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' draw_3Dscatter(iris, group = iris$Species, theme = theme_darkgraygrid()) draw_3Dscatter <- function( x, y = NULL, z = NULL, fit = NULL, cluster = NULL, cluster_config = NULL, group = NULL, formula = NULL, rsq = TRUE, mode = "markers", order_on_x = NULL, main = NULL, xlab = NULL, ylab = NULL, zlab = NULL, alpha = .8, bg = NULL, plot_bg = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), axes_square = FALSE, group_names = NULL, font_size = 16, marker_col = NULL, marker_size = 8, fit_col = NULL, fit_alpha = .7, fit_lwd = 2.5, tick_font_size = 12, spike_col = NULL, legend = NULL, legend_xy = c(0, 1), legend_xanchor = "left", legend_yanchor = "auto", legend_orientation = "v", legend_col = NULL, legend_bg = "#FFFFFF00", legend_border_col = "#FFFFFF00", legend_borderwidth = 0, legend_group_gap = 0, margin = list(t = 30, b = 0, l = 0, r = 0), fit_params = NULL, width = NULL, height = NULL, padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", verbosity = 0L, filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) { # Dependencies ---- check_dependencies("plotly") # Arguments ---- if (is.null(y) && is.null(z) && NCOL(x) > 2) { .colnames <- labelify(colnames(x)) y <- x[, 2] z <- x[, 3] x <- x[, 1] if (is.null(xlab)) { xlab <- .colnames[1] } if (is.null(ylab)) { ylab <- .colnames[2] } if (is.null(zlab)) zlab <- .colnames[3] } if (!is.null(main)) { main <- paste0("", main, "") } if (!is.null(fit)) { if (fit == "none") fit <- NULL } # easier to work with shiny if (!is.null(fit)) { fit <- toupper(fit) } .mode <- mode .names <- group_names # order_on_x ---- if (is.null(order_on_x)) { order_on_x <- if (!is.null(fit) || any(grepl("lines", mode))) { TRUE } else { FALSE } } # Cluster ---- if (!is.null(cluster)) { group <- suppressWarnings( cluster( x = data.frame(x, y), algorithm = cluster, config = do_call( get_clust_setup_fn(cluster), cluster_config ) )@clusters ) group <- paste("Cluster", group) } # Data ---- # xlab, ylab ---- # The gsubs remove all text up to and including a "$" symbol if present if (is.null(xlab)) { if (is.list(x)) { xlab <- "x" } else { xlab <- labelify(gsub(".*\\$", "", deparse(substitute(x)))) } } if (!is.null(y) && is.null(ylab)) { if (is.list(y)) { ylab <- "y" } else { ylab <- labelify(gsub(".*\\$", "", deparse(substitute(y)))) } } if (!is.null(z) && is.null(zlab)) { if (is.list(z)) { zlab <- "z" } else { zlab <- labelify(gsub(".*\\$", "", deparse(substitute(z)))) } } # '- Group ---- if (!is.null(group)) { group <- as.factor(group) x <- split(x, group, drop = TRUE) y <- split(y, group, drop = TRUE) z <- split(z, group, drop = TRUE) if (is.null(group_names)) { group_names <- levels(droplevels(group)) } names(x) <- names(y) <- names(z) <- .names <- group_names } # Try to get names from list or data frame inputs if (is.list(y) || NCOL(y) > 1) { if (is.null(.names) && !is.null(names(y))) .names <- names(y) } if (is.list(x) || NCOL(x) > 1) { if (is.null(.names) && !is.null(names(x))) .names <- names(x) } if (is.list(z) || NCOL(z) > 1) { if (is.null(.names) && !is.null(names(z))) .names <- names(z) } # Convert to lists ---- x <- if (!is.list(x)) as.list(as.data.frame(x)) else x y <- if (!is.null(y) && !is.list(y)) as.list(as.data.frame(y)) else y z <- if (!is.null(z) && !is.list(z)) as.list(as.data.frame(z)) else z if (length(x) == 1 && length(y) > 1) { x <- rep(x, length(y)) .names <- names(y) } if (length(y) == 1 && length(x) > 1) { y <- rep(y, length(x)) .names <- names(x) } if (length(z) == 1 && length(x) > 1) { z <- rep(z, length(x)) .names <- names(x) } n_groups <- length(x) # legend <- if (is.null(legend) & n_groups == 1 & is.null(fit)) FALSE else TRUE legend <- if (is.null(legend) && n_groups == 1) FALSE else TRUE if (length(.mode) < n_groups) { .mode <- c(.mode, rep(tail(.mode)[1], n_groups - length(.mode))) } # if (is.null(legend)) legend <- n_groups > 1 if (is.null(.names)) { if (n_groups > 1) { .names <- paste("Group", seq_len(n_groups)) } else { .names <- if (!is.null(fit)) fit else NULL .names <- NULL } } # Reorder ---- if (order_on_x) { index <- lapply(x, order) x <- lapply(seq(x), function(i) x[[i]][index[[i]]]) y <- lapply(seq(x), function(i) y[[i]][index[[i]]]) z <- lapply(seq(x), function(i) z[[i]][index[[i]]]) } # s.e. fit ---- se_fit <- FALSE # if (se_fit) { # if (!fit %in% c("GLM", "LM", "LOESS", "GAM", "NW")) { # warning(paste("Standard error of the fit not available for", fit, "- try LM, LOESS, GAM, or NW")) # se_fit <- FALSE # } # } # Colors ---- col <- recycle(palette, seq_len(n_groups)) # Convert inputs to RGB spike_col <- plotly::toRGB(spike_col) # Theme ---- axes_visible <- FALSE axes_mirrored <- FALSE check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]], theme[["grid_alpha"]]) tick_col <- plotly::toRGB(theme[["tick_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) if (!theme[["axes_visible"]]) { tick_col <- labs_col <- "transparent" } # marker_col, se_col ---- if (is.null(marker_col)) { marker_col <- if (!is.null(fit) && n_groups == 1) { as.list(rep(theme[["fg"]], n_groups)) } else { col } } if (!is.null(fit)) { if (is.null(fit_col)) fit_col <- col } # Derived if (is.null(legend_col)) { legend_col <- labs_col } # Size ---- if (axes_square) { width <- height <- min(dev.size("px")) - 10 } # fitted & se_fit ---- # If plotting se bands, need to include (fitted +/- se.times * se) in the axis limits if (se_fit) { se <- list() } else { se <- NULL } if (!is.null(fit)) { # learner <- get_train_fn(fit) fitted <- list() fitted_text <- character() for (i in seq_len(n_groups)) { df1 <- data.frame(x[[i]], y[[i]], z[[i]]) mod <- train( df1, algorithm = fit, hyperparameters = fit_params, verbosity = verbosity ) fitted[[i]] <- fitted(mod) if (se_fit) { se[[i]] <- se(mod) } fitted_text[i] <- fit if (rsq) { fitted_text[i] <- paste0( fitted_text[i], if (n_groups == 1) " (" else " ", "R2 = ", ddSci(mod@metrics_training[["Rsq"]]), if (n_groups == 1) ")" ) } } } # plotly ---- plt <- plotly::plot_ly( width = width, height = height ) for (i in seq_len(n_groups)) { # '- { Scatter } ---- marker <- if (grepl("markers", .mode[i])) { list( color = plotly::toRGB(marker_col[[i]], alpha = alpha), size = marker_size ) } else { NULL } plt <- plotly::add_trace( plt, x = x[[i]], y = y[[i]], z = z[[i]], type = "scatter3d", mode = .mode[i], # fillcolor = plotly::toRGB(col[[i]], alpha), # name = if (n_groups > 1) .names[i] else "Raw", name = .names[i], # text = .text[[i]], # hoverinfo = "text", # marker = if (grepl("markers", .mode[i])) list(color = plotly::toRGB(marker_col[[i]], alpha = alpha)) else NULL, marker = marker, line = if (grepl("lines", .mode[i])) { list(color = plotly::toRGB(marker_col[[i]], alpha = alpha)) } else { NULL }, legendgroup = if (n_groups > 1) .names[i] else "Raw", showlegend = legend ) # if (se_fit) { # # '- { SE band } ---- # plt <- plotly::add_trace(plt, # x = x[[i]], # y = fitted[[i]] + se.times * se[[i]], # type = "scatter", # mode = "lines", # line = list(color = "transparent"), # legendgroup = .names[i], # showlegend = FALSE, # hoverinfo = "none", # inherit = FALSE) # plt <- plotly::add_trace(plt, x = x[[i]], # y = fitted[[i]] - se.times * se[[i]], # type = "scatter", # mode = "lines", # fill = "tonexty", # fillcolor = plotly::toRGB(se_col[[i]], alpha = se.alpha), # line = list(color = "transparent"), # # name = shade.name, # legendgroup = .names[i], # showlegend = FALSE, # hoverinfo = "none", # inherit = FALSE) # } if (!is.null(fit)) { # '- { Fitted mesh } ---- plt <- plotly::add_trace( plt, x = x[[i]], y = y[[i]], z = fitted[[i]], type = "mesh3d", opacity = fit_alpha, name = fitted_text[i], # legendgroup = .names[i], # showlegend = if (legend & n_groups == 1) TRUE else FALSE, inherit = FALSE, showscale = FALSE, intensity = 1, colorscale = list( c(0, plotly::toRGB(fit_col[[i]])), c(1, plotly::toRGB(fit_col[[i]])) ) ) } } # Layout ---- # '- layout ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) tickfont <- list( family = theme[["font_family"]], size = tick_font_size, color = theme[["tick_labels_col"]] ) .legend <- list( x = legend_xy[1], xanchor = legend_xanchor, y = legend_xy[2], yanchor = legend_yanchor, font = list( family = theme[["font_family"]], size = font_size, color = legend_col ), orientation = legend_orientation, bgcolor = plotly::toRGB(legend_bg), bordercolor = plotly::toRGB(legend_border_col), borderwidth = legend_borderwidth, tracegroupgap = legend_group_gap ) plt <- plotly::layout( plt, scene = list( yaxis = list( title = ylab, showline = axes_visible, mirror = axes_mirrored, titlefont = f, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = FALSE, spikecolor = spike_col ), xaxis = list( title = xlab, showline = axes_visible, mirror = axes_mirrored, titlefont = f, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = FALSE, spikecolor = spike_col ), zaxis = list( title = zlab, showline = axes_visible, mirror = axes_mirrored, titlefont = f, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = FALSE, spikecolor = spike_col ) ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ) ), # titlefont = list(), paper_bgcolor = bg, plot_bgcolor = plot_bg, margin = margin, showlegend = legend, legend = .legend ) # Padding plt[["sizingPolicy"]][["padding"]] <- padding # Config plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ) ) # Write to file ---- if (!is.null(filename)) { plotly::save_image( plt, file = file.path(filename), width = file_width, height = file_height, scale = file_scale ) } plt } # /rtemis::draw_3Dscatter ================================================ FILE: R/draw_bar.R ================================================ # draw_bar.R # ::rtemis:: # 2019-22 EDG rtemis.org #' Interactive Barplots #' #' Draw interactive barplots using `plotly` #' #' @details #' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation. #' #' @param x vector (possibly named), matrix, or data.frame: If matrix or #' data.frame, rows are groups (can be 1 row), columns are features #' @param main Character: Main plot title. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param alpha Float (0, 1]: Transparency for bar colors. #' @param theme `Theme` object. #' @param palette Character vector: Colors to use. #' @param barmode Character: Type of bar plot to make: "group", "relative", #' "stack", "overlay". Default = "group". Use #' "relative" for stacked bars, wich handles negative values correctly, #' unlike "stack", as of writing. #' @param group_names Character, vector, length = NROW(x): Group names. #' Default = NULL, which uses `rownames(x)` #' @param order_by_val Logical: If TRUE, order bars by increasing value. #' Only use for single group data. #' @param ylim Float, vector, length 2: y-axis limits. #' @param hovernames Character, vector: Optional character vector to show on #' hover over each bar. #' @param feature_names Character, vector, length = NCOL(x): Feature names. #' Default = NULL, which uses `colnames(x)` #' @param font_size Float: Font size for all labels. #' @param legend Logical: If TRUE, draw legend. Default = NULL, and will be #' turned on if there is more than one feature present #' @param legend_col Color: Legend text color. Default = NULL, determined by #' theme #' @param hline Float: If defined, draw a horizontal line at this y value. #' @param hline_col Color for `hline`. #' @param hline_width Float: Width for `hline`. #' @param hline_dash Character: Type of line to draw: "solid", "dot", "dash", #' "longdash", "dashdot", #' or "longdashdot" #' @param hline_annotate Character: Text of horizontal line annotation if #' `hline` is set #' @param hline_annotation_x Numeric: x position to place annotation with paper #' as reference. 0: to the left of the plot area; 1: to the right of the plot area #' @param margin Named list: plot margins. #' @param padding Integer: N pixels to pad plot. #' @param horizontal Logical: If TRUE, plot bars horizontally #' @param annotate Logical: If TRUE, annotate stacked bars #' @param annotate_col Color for annotations #' @param legend_xy Numeric, vector, length 2: x and y for plotly's legend #' @param legend_orientation "v" or "h" for vertical or horizontal #' @param legend_xanchor Character: Legend's x anchor: "left", "center", #' "right", "auto" #' @param legend_yanchor Character: Legend's y anchor: "top", "middle", #' "bottom", "auto" #' @param automargin_x Logical: If TRUE, automatically set x-axis margins #' @param automargin_y Logical: If TRUE, automatically set y-axis margins #' @param displayModeBar Logical: If TRUE, show plotly's modebar #' @param modeBar_file_format Character: "svg", "png", "jpeg", "pdf" / any #' output file type supported by plotly and your system # @param print_plot Logical: If TRUE, print plot, otherwise return it invisibly #' @param filename Character: Path to file to save static plot. #' @param file_width Integer: File width in pixels for when `filename` is #' set. #' @param file_height Integer: File height in pixels for when `filename` #' is set. #' @param file_scale Numeric: If saving to file, scale plot by this number #' @param verbosity Integer: Verbosity level. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' draw_bar(VADeaths, legend_xy = c(0, 1)) #' draw_bar(VADeaths, legend_xy = c(1, 1), legend_xanchor = "left") #' # simple individual bars #' a <- c(4, 7, 2) #' draw_bar(a) #' # if input is a data.frame, each row is a group and each column is a feature #' b <- data.frame(x = c(3, 5, 7), y = c(2, 1, 8), z = c(4, 5, 2)) #' rownames(b) <- c("Jen", "Ben", "Ren") #' draw_bar(b) #' # stacked #' draw_bar(b, barmode = "stack") draw_bar <- function( x, main = NULL, xlab = NULL, ylab = NULL, alpha = 1, horizontal = FALSE, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), barmode = c("group", "relative", "stack", "overlay"), group_names = NULL, order_by_val = FALSE, ylim = NULL, hovernames = NULL, feature_names = NULL, font_size = 16, annotate = FALSE, annotate_col = theme[["labs_col"]], legend = NULL, legend_col = NULL, legend_xy = c(1, 1), legend_orientation = "v", legend_xanchor = "left", legend_yanchor = "auto", hline = NULL, hline_col = NULL, hline_width = 1, hline_dash = "solid", hline_annotate = NULL, hline_annotation_x = 1, margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0), automargin_x = TRUE, automargin_y = TRUE, padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, verbosity = 0L ) { # Dependencies ---- check_dependencies("plotly") # Arguments ---- barmode <- match.arg(barmode) if (!is.null(main)) { main <- paste0("", main, "") } dat <- as.data.frame(x) if (NROW(dat) == 1 && barmode != "stack") { dat <- as.data.frame(t(dat)) } # Order by val ---- if (order_by_val) { if (NCOL(dat) > 1) { order_ <- order(sapply(dat, mean, na.rm = TRUE)) dat <- dat[, order_] } else { order_ <- order(dat[[1]]) dat <- dat[order_, , drop = FALSE] } if (!is.null(group_names)) { group_names <- group_names[order_] } if (!is.null(hovernames)) hovernames <- hovernames[order_] } # Group names ---- group_names_ <- group_names if (is.null(group_names)) { if (!is.null(rownames(dat))) group_names_ <- rownames(dat) } if (verbosity > 0L) { msg("group_names_:", group_names_, "\n") } # Feature names ---- feature_names_ <- feature_names if (is.null(feature_names_)) { if (!is.null(colnames(dat))) { feature_names_ <- labelify(colnames(dat)) } else { feature_names_ <- paste0("Feature", seq_len(NCOL(dat))) } } if (verbosity > 0L) { msg("feature_names_:", feature_names_, "\n") } if (is.null(legend)) { legend <- length(feature_names_) > 1 } # Colors ---- p <- NCOL(dat) col <- recycle(palette, seq(p))[seq(p)] # Theme ---- check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]]) tick_col <- plotly::toRGB(theme[["tick_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) # Derived if (is.null(legend_col)) { legend_col <- labs_col } if (!is.null(hovernames)) { hovernames <- matrix(hovernames) if (NCOL(hovernames) == 1 && p > 1) { hovernames <- matrix(rep(hovernames, p), ncol = p) } } # plot_ly ---- group_names_ <- factor(group_names_, levels = group_names_) plt <- plotly::plot_ly( x = if (horizontal) dat[[1]] else group_names_, y = if (horizontal) group_names_ else dat[[1]], type = "bar", name = feature_names_[1], text = hovernames[, 1], marker = list(color = plotly::toRGB(if (p > 1) col[1] else col, alpha)), showlegend = legend ) if (p > 1) { for (i in seq_len(p)[-1]) { plt <- plotly::add_trace( plt, x = if (horizontal) dat[[i]] else group_names_, y = if (horizontal) group_names_ else dat[[i]], name = feature_names_[i], text = hovernames[, i], marker = list(color = plotly::toRGB(col[i], alpha)) ) } } if (annotate) { if (barmode != "stack") { warning("Set barmode to 'stack' to allow annotation") } else { if (horizontal) { for (i in seq_len(ncol(dat))) { plt <- plt |> plotly::add_annotations( xref = "x", yref = "y", x = rowSums(dat[, seq_len(i - 1), drop = FALSE]) + dat[, i] / 2, y = seq_len(nrow(dat)) - 1, text = paste(dat[, i]), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } } else { for (i in seq_len(ncol(dat))) { plt <- plt |> plotly::add_annotations( xref = "x", yref = "y", x = seq_len(nrow(dat)) - 1, y = rowSums(dat[, seq_len(i - 1), drop = FALSE]) + dat[, i] / 2, text = paste(signif(dat[, i], 2)), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } } } } # Layout ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) tickfont <- list( family = theme[["font_family"]], size = font_size, color = theme[["tick_labels_col"]] ) legend_ <- list( x = legend_xy[1], y = legend_xy[2], xanchor = legend_xanchor, yanchor = legend_yanchor, bgcolor = "#ffffff00", font = list( family = theme[["font_family"]], size = font_size, color = legend_col ), orientation = legend_orientation ) plt <- plotly::layout( plt, yaxis = list( title = ylab, # showline = axes_visible, # mirror = axes_mirrored, range = ylim, titlefont = f, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = FALSE, automargin = automargin_y ), xaxis = list( title = xlab, # showline = axes_visible, # mirror = axes_mirrored, titlefont = f, showgrid = FALSE, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, automargin = automargin_x ), barmode = barmode, # group works without actual groups too title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]] ), paper_bgcolor = bg, plot_bgcolor = plot_bg, margin = margin, # showlegend = legend, legend = legend_ ) # hline ---- if (!is.null(hline)) { if (is.null(hline_col)) { hline_col <- theme[["fg"]] } hline_col <- recycle(hline_col, hline) hline_width <- recycle(hline_width, hline) hline_dash <- recycle(hline_dash, hline) hlinel <- lapply(seq_along(hline), function(i) { list( type = "line", x0 = 0, x1 = 1, xref = "paper", y0 = hline[i], y1 = hline[i], line = list( color = hline_col[i], width = hline_width[i], dash = hline_dash[i] ) ) }) plt <- plotly::layout(plt, shapes = hlinel) # Annotate horizontal lines on the right border of the plot if (!is.null(hline_annotate)) { plt <- plt |> plotly::add_annotations( xref = "paper", yref = "y", xanchor = "right", yanchor = "bottom", x = hline_annotation_x, y = hline, text = hline_annotate, font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } } # Padding plt[["sizingPolicy"]][["padding"]] <- padding # Config plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ) ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_bar.R ================================================ FILE: R/draw_box.R ================================================ # draw_box.R # ::rtemis:: # EDG rtemis.org #' Interactive Boxplots & Violin plots #' #' Draw interactive boxplots or violin plots using \pkg{plotly} #' #' @details #' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation. #' #' For multiple box plots, the recommendation is: #' - `x=dat[, columnindex]` for multiple variables of a data.frame #' - `x=list(a=..., b=..., etc.)` for multiple variables of potentially #' different length #' - `x=split(var, group)` for one variable with multiple groups: group names #' appear below boxplots #' - `x=dat[, columnindex], group = factor` for grouping multiple variables: #' group names appear in legend #' #' If `orientation == "h"`, `xlab` is applied to y-axis and vice versa. #' Similarly, `x.axist.type` applies to y-axis - this defaults to #' "category" and would not normally need changing. #' #' @param x Vector or List of vectors: Input #' @param time Date or date-time vector #' @param time_bin Character: "year", "quarter", "month", or "day". Period to #' bin by #' @param type Character: "box" or "violin" #' @param group Factor to group by #' @param x_transform Character: "none", "scale", or "minmax" to use raw values, #' scaled and centered values or min-max normalized to 0-1, respectively. #' Transform is applied to each variable before grouping, so that groups are #' comparable #' @param main Character: Plot title. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param alpha Float (0, 1]: Transparency for box colors. #' @param bg Color: Background color. #' @param plot_bg Color: Background color for plot area. #' @param theme `Theme` object. #' @param palette Character vector: Colors to use. #' @param quartilemethod Character: "linear", "exclusive", "inclusive" #' @param xlim Numeric vector: x-axis limits #' @param ylim Numeric vector: y-axis limits #' @param boxpoints Character or FALSE: "all", "suspectedoutliers", "outliers" #' See #' @param xnames Character, vector, length = NROW(x): x-axis names. Default = NULL, which #' tries to set names automatically. #' @param group_lines Logical: If TRUE, add separating lines between groups of #' boxplots #' @param group_lines_dash Character: "solid", "dot", "dash", "longdash", #' "dashdot", or "longdashdot" #' @param group_lines_col Color for `group_lines` #' @param group_lines_alpha Numeric: transparency for `group_lines_col` #' @param order_by_fn Function: If defined, order boxes by increasing value of #' this function (e.g. median). #' @param font_size Float: Font size for all labels. #' @param ylab_standoff Numeric: Standoff for y-axis label #' @param legend Logical: If TRUE, draw legend. #' @param legend_col Color: Legend text color. Default = NULL, determined by #' the theme. #' @param legend_xy Float, vector, length 2: Relative x, y position for legend. #' @param xaxis_type Character: "linear", "log", "date", "category", #' "multicategory" #' @param cataxis_tickangle Numeric: Angle for categorical axis tick labels #' @param margin Named list: plot margins. #' @param violin_box Logical: If TRUE and type is "violin" show box within #' violin plot #' @param orientation Character: "v" or "h" for vertical, horizontal #' @param annotate_n Logical: If TRUE, annotate with N in each box #' @param annotate_n_y Numeric: y position for `annotate_n` #' @param annotate_mean Logical: If TRUE, annotate with mean of each box #' @param annotate_meansd Logical: If TRUE, annotate with mean (SD) of each box #' @param annotate_meansd_y Numeric: y position for `annotate_meansd` #' @param annotate_col Color for annotations #' @param labelify Logical: If TRUE, [labelify] x names #' @param legend_orientation "v" or "h" for vertical, horizontal #' @param legend_xanchor Character: Legend's x anchor: "left", "center", #' "right", "auto" #' @param legend_yanchor Character: Legend's y anchor: "top", "middle", #' "bottom", "auto" #' @param automargin_x Logical: If TRUE, automatically set x-axis margins #' @param automargin_y Logical: If TRUE, automatically set y-axis margins #' @param boxgroupgap Numeric: Sets the gap (in plot fraction) between boxes #' of the same location coordinate #' @param hovertext Character vector: Text to show on hover for each data point #' @param show_n Logical: If TRUE, show N in each box #' @param pvals Numeric vector: Precomputed p-values. Should correspond to each box. #' Bypasses `htest` and `htest_compare`. Requires `group` to be set #' @param htest Character: e.g. "t.test", "wilcox.test" to compare each box to #' the *first* box. If grouped, compare within each group to the first box. #' If p-value of test is less than `htest.thresh`, add asterisk above/ #' to the side of each box #' @param htest_compare Integer: 0: Compare all distributions against the first one; #' 2: Compare every second box to the one before it. Requires `group` to #' be set #' @param htest_y Numeric: y coordinate for `htest` annotation #' @param htest_annotate Logical: if TRUE, include htest annotation #' @param htest_annotate_x Numeric: x-axis paper coordinate for htest annotation #' @param htest_annotate_y Numeric: y-axis paper coordinate for htest annotation #' @param htest_star_col Color for htest annotation stars #' @param htest_bracket_col Color for htest annotation brackets #' @param starbracket_pad Numeric: Padding for htest annotation brackets #' @param use_plotly_group If TRUE, use plotly's `group` arg to group #' boxes. #' @param width Numeric: Force plot size to this width. Default = NULL, i.e. fill #' available space #' @param height Numeric: Force plot size to this height. Default = NULL, i.e. fill #' available space #' @param displayModeBar Logical: If TRUE, show plotly's modebar #' @param filename Character: Path to file to save static plot. #' @param modeBar_file_format Character: "svg", "png", "jpeg", "pdf" #' @param file_width Integer: File width in pixels for when `filename` is #' set. #' @param file_height Integer: File height in pixels for when `filename` #' is set. #' @param file_scale Numeric: If saving to file, scale plot by this number #' @param mathjax Optional Character \{"local", "cdn"\}: Whether to use local or CDN version of #' MathJax for rendering mathematical annotations. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # A.1 Box plot of 4 variables #' draw_box(iris[, 1:4]) #' # A.2 Grouped Box plot #' draw_box(iris[, 1:4], group = iris[["Species"]]) #' draw_box(iris[, 1:4], group = iris[["Species"]], annotate_n = TRUE) #' # B. Boxplot binned by time periods #' # Synthetic data with an instantenous shift in distributions #' set.seed(2021) #' dat1 <- data.frame(alpha = rnorm(200, 0), beta = rnorm(200, 2), gamma = rnorm(200, 3)) #' dat2 <- data.frame(alpha = rnorm(200, 5), beta = rnorm(200, 8), gamma = rnorm(200, -3)) #' x <- rbind(dat1, dat2) #' startDate <- as.Date("2019-12-04") #' endDate <- as.Date("2021-03-31") #' time <- seq(startDate, endDate, length.out = 400) #' draw_box(x[, 1], time, "year", ylab = "alpha") #' draw_box(x, time, "year", legend.xy = c(0, 1)) #' draw_box(x, time, "quarter", legend.xy = c(0, 1)) #' draw_box(x, time, "month", #' legend.orientation = "h", #' legend.xy = c(0, 1), #' legend.yanchor = "bottom" #' ) #' # (Note how the boxplots widen when the period includes data from both dat1 and dat2) draw_box <- function( x, time = NULL, time_bin = c("year", "quarter", "month", "day"), type = c("box", "violin"), group = NULL, x_transform = c("none", "scale", "minmax"), main = NULL, xlab = "", ylab = NULL, alpha = .6, bg = NULL, plot_bg = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), boxpoints = "outliers", quartilemethod = "linear", xlim = NULL, ylim = NULL, violin_box = TRUE, orientation = "v", annotate_n = FALSE, annotate_n_y = 1, annotate_mean = FALSE, # forr A.2.b. annotate_meansd = FALSE, annotate_meansd_y = 1, annotate_col = theme[["labs_col"]], xnames = NULL, group_lines = FALSE, group_lines_dash = "dot", group_lines_col = NULL, group_lines_alpha = .5, labelify = TRUE, order_by_fn = NULL, font_size = 16, # Axes ylab_standoff = 18, legend = NULL, legend_col = NULL, legend_xy = NULL, legend_orientation = "v", legend_xanchor = "auto", legend_yanchor = "auto", xaxis_type = "category", cataxis_tickangle = "auto", # margin = list(t = 35, pad = 0), margin = list(b = 65, l = 65, t = 50, r = 12, pad = 0), automargin_x = TRUE, automargin_y = TRUE, # boxgap = 0, #1/nvars, #.12, boxgroupgap = NULL, hovertext = NULL, show_n = FALSE, # boxmode = NULL, pvals = NULL, htest = "none", htest_compare = 0, # htest.thresh = .05, htest_y = NULL, htest_annotate = TRUE, htest_annotate_x = 0, htest_annotate_y = -.065, htest_star_col = theme[["labs_col"]], htest_bracket_col = theme[["labs_col"]], starbracket_pad = c(.04, .05, .09), use_plotly_group = FALSE, width = NULL, height = NULL, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, mathjax = NULL ) { # Dependencies ---- check_dependencies("plotly") # Arguments ---- type <- match.arg(type) x_transform <- match.arg(x_transform) # Convert vector or data.frame/data.table/matrix to list if (!is.list(x)) { # x is vector if (is.numeric(x)) { .names <- deparse(substitute(x)) x <- list(x) names(x) <- .names } else { # x is data.frame or matrix .names <- colnames(x) x <- lapply(seq_len(NCOL(x)), function(i) x[, i]) names(x) <- .names } } nvars <- length(x) if (nvars > 1 && !is.null(group) && !is.null(time)) { cli::cli_abort("Better use subplot for each variable") } horizontal <- orientation == "h" if (x_transform != "none") { if (x_transform == "scale") { x <- lapply(x, \(z) as.numeric(scale(z))) } else if (x_transform == "minmax") { x <- lapply(x, drange) } else { cli::cli_abort("Unsupported x_transform specified") } } # Order by fn ---- if (!is.null(order_by_fn)) { if (is.null(time)) { if (is.list(x)) { .order <- order(sapply(x, order_by_fn, na.rm = TRUE)) if (is.data.frame(x)) { x <- x[, .order] } else { x <- x[names(x)[.order]] } } if (!is.null(xnames)) { xnames <- xnames[.order] } } else { cli::cli_abort("Cannot use `order_by_fn` with `time`") } } # Remove non-numeric vectors # which.nonnum <- which(sapply(x, function(i) !is.numeric(i))) # if (length(which.nonnum) > 0) x[[which.nonnum]] <- NULL if (!is.null(group)) { group <- factor(group) } n_groups <- if (is.null(group)) { length(x) } else { nlevels(group) } if (n_groups == 1) { htest <- "none" } .xnames <- xnames if (is.null(.xnames)) { .xnames <- names(x) if (is.null(.xnames)) { .xnames <- paste0("Feature", seq(n_groups)) } if (labelify) .xnames <- labelify(.xnames) } # Colors ---- col <- recycle(palette, seq(n_groups)) # Theme ---- check_is_S7(theme, Theme) if (theme[["main_font"]] == 2) { main <- paste0("", main, "") } bg <- plotly::toRGB(theme[["bg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]]) tick_col <- plotly::toRGB(theme[["tick_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) # Derived if (is.null(legend_col)) { legend_col <- labs_col } # Plot ---- if (is.null(time)) { if (is.null(group)) { # A.1 Single and multiple boxplots ---- if (is.null(legend)) { legend <- FALSE } # Args for first trace .args <- if (horizontal) { list(x = x[[1]], y = NULL) } else { list(x = NULL, y = x[[1]]) } .args <- c( .args, list( type = type, # name = .xnames[1], name = if (show_n) { paste0(.xnames[1], " (N=", length(x[[1]]), ")") } else { .xnames[1] }, line = list(color = plotly::toRGB(col[1])), fillcolor = plotly::toRGB(col[1], alpha), marker = list(color = plotly::toRGB(col[1], alpha)), showlegend = legend # width = width ) ) if (!is.null(hovertext) && n_groups == 1) { hovertext <- list(hovertext) } if (type == "box") { .args <- c( .args, list( quartilemethod = quartilemethod, boxpoints = boxpoints ) ) if (!is.null(hovertext)) .args[["text"]] <- hovertext[[1]] } if (type == "violin") { .args[["box"]] <- list(visible = violin_box) } plt <- do.call(plotly::plot_ly, .args) if (n_groups > 1) { for (i in seq_len(n_groups)[-1]) { plt <- plotly::add_trace( plt, x = if (horizontal) x[[i]] else NULL, y = if (horizontal) NULL else x[[i]], # name = .xnames[i], name = if (show_n) { paste0(.xnames[i], " (N=", length(x[[i]]), ")") } else { .xnames[i] }, line = list(color = plotly::toRGB(col[i])), # box borders fillcolor = plotly::toRGB(col[i], alpha), # box fill marker = list(color = plotly::toRGB(col[i], alpha)), # points text = if (!is.null(hovertext)) hovertext[[i]] else NULL ) } } # '-Annotate N ---- if (annotate_n) { Nperbox <- Filter( function(i) i > 0, sapply(x, function(j) length(na.exclude(j))) ) plt <- plt |> plotly::add_annotations( xref = "paper", yref = "paper", xanchor = "right", yanchor = "bottom", x = 0, y = annotate_n_y, text = "N =", font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) |> plotly::add_annotations( xref = "x", yref = "paper", yanchor = "bottom", # x = seq_len(nvars) - 1, x = seq_along(Nperbox) - 1, y = 1, text = as.character(Nperbox), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } # /annotate_n # '-Annotate Mean SD ---- if (annotate_meansd) { Meanperbox <- sapply(x, function(j) mean(na.exclude(j))) |> round(digits = 2) |> format(nsmall = 2) SDperbox <- sapply(x, function(j) sd(na.exclude(j))) |> round(digits = 2) |> format(nsmall = 2) plt <- plt |> plotly::add_annotations( xref = "x", yref = "paper", yanchor = "bottom", x = seq_along(Meanperbox) - 1, y = 1, # text = as.character(Nperbox), text = paste0(Meanperbox, " (", SDperbox, ")"), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } # /annotate_meansd # '-htest ---- if (htest != "none") { if (htest_compare == 0) { pvals <- sapply(x[-1], \(v) { suppressWarnings( do.call(htest, list(x = x[[1]], y = v))[["p.value"]] ) }) } y_sb <- starbracket_y(unlist(x), pad = starbracket_pad) if (is.null(htest_y)) { htest_y <- y_sb[["star"]] } plt <- plt |> plotly::add_annotations( xref = if (horizontal) "paper" else "x", # yref = if (horizontal) "x" else "paper", yref = if (horizontal) "x" else "y", yanchor = if (horizontal) "auto" else "top", xanchor = if (horizontal) "center" else "auto", x = if (horizontal) htest_y else seq_along(pvals), # exclude first y = if (horizontal) seq_along(pvals) else htest_y, # text = unname(ifelse(pvals < htest.thresh, "*", "")), text = pval_stars(pvals), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) if (htest_annotate) { test <- switch( htest, `wilcox.test` = "Wilcoxon", `t.test` = "T-test", htest ) plt <- plt |> plotly::add_annotations( xref = "paper", yref = "paper", yanchor = "top", xanchor = "left", x = htest_annotate_x, y = htest_annotate_y, # text = paste0("*", test, " p-val < ", htest.thresh), # text = paste0("* ", test, " p-val < ", htest.thresh), # text = paste0( # '* ', # test, " p-val < ", htest.thresh), text = paste0( test, " p-val:", ' * ', "< .05", ' ** ', "< .01", ' *** ', "< .001" ), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } } # / htest!="none" } else { if (use_plotly_group) { # A.2.a Grouped boxplots with [group] ---- # Best to use this for multiple variables x group. # For single variables x group, preferred way it to use # split(var, group) => A1 if (is.null(legend)) { legend <- TRUE } dt <- cbind(data.table::as.data.table(x), group = group) dtlong <- data.table::melt( dt[, ID := seq_len(nrow(dt))], id.vars = c("ID", "group") ) if (is.null(ylab)) { ylab <- "" } .args <- list( data = dtlong, type = type, x = if (horizontal) ~value else ~variable, y = if (horizontal) ~variable else ~value, color = ~group, colors = col2hex(col), showlegend = legend ) if (type == "box") { .args <- c( .args, list( quartilemethod = quartilemethod, boxpoints = boxpoints, alpha = alpha ) ) if (!is.null(hovertext)) { dtlong <- merge(dtlong, cbind(dt[, list(ID)], hovertext)) .args[["text"]] <- dtlong[["hovertext"]] } } if (type == "violin") { .args[["box"]] <- list(visible = violin_box) } cataxis <- list( tickvals = 0:(NCOL(dt) - 2), ticktext = .xnames ) .args <- c(list(width = width, height = height), .args) plt <- do.call(plotly::plot_ly, .args) |> plotly::layout( boxmode = "group", xaxis = if (horizontal) NULL else cataxis, yaxis = if (horizontal) cataxis else NULL ) } else { # A.2.b Grouped boxplots with split and loop ---- # Replaces A.2.a to allow annotation positioning if (is.null(legend)) { legend <- TRUE } dts <- split(data.table::as.data.table(x), group, drop = TRUE) if (is.null(ylab)) { ylab <- "" } if (type == "box") { .args <- list( type = "box", quartilemethod = quartilemethod, boxpoints = boxpoints, alpha = alpha ) } else { .args <- list( type = "violin", box = list(visible = violin_box) ) } varnames <- names(x) nvars <- length(varnames) ngroups <- length(dts) groupnames <- names(dts) xval <- do.call(paste, expand.grid(groupnames, varnames)) # text = xval[i], xval <- factor(xval, levels = xval) boxindex <- 0 # plt <- plotly::plot_ly(type = type) # box or violin .args <- c(list(width = width, height = height), .args) plt <- do.call(plotly::plot_ly, .args) for (i in seq_along(varnames)) { # loop vars for (j in seq_along(dts)) { # loop groups boxindex <- boxindex + 1 plt <- plt |> plotly::add_trace( x = if (horizontal) dts[[j]][[i]] else xval[boxindex], y = if (horizontal) xval[boxindex] else dts[[j]][[i]], name = groupnames[j], meta = xval[boxindex], line = list(color = plotly::toRGB(col[j])), fillcolor = plotly::toRGB(col[j], alpha), marker = list(color = plotly::toRGB(col[j], alpha)), showlegend = legend & (i == nvars), hoverinfo = "all", legendgroup = groupnames[j] ) } } cataxis <- list( type = "category", tickmode = "array", tickvals = (mean(seq_len(ngroups)) + 0:(nvars - 1) * ngroups) - 1, # need -1 if type = "category" ticktext = .xnames, tickangle = cataxis_tickangle, automargin = TRUE ) plt <- plt |> plotly::layout( xaxis = if (horizontal) NULL else cataxis, yaxis = if (horizontal) cataxis else NULL ) # '- Group lines ---- if (nvars > 1 && group_lines) { if (is.null(group_lines_col)) { group_lines_col <- theme[["fg"]] } group_lines_col <- adjustcolor( group_lines_col, group_lines_alpha ) at <- seq((ngroups - .5), (ngroups * (nvars - 1) - .5), by = ngroups) if (horizontal) { plt <- plt |> plotly::layout( shapes = plotly_hline( at, color = group_lines_col, dash = group_lines_dash ) ) } else { plt <- plt |> plotly::layout( shapes = plotly_vline( at, color = group_lines_col, dash = group_lines_dash ) ) } } # '-Annotate N ---- if (annotate_n) { Nperbox <- Filter( function(i) i > 0, c(t(sapply(dts, function(i) { sapply(i, function(j) length(na.exclude(j))) }))) ) plt <- plt |> plotly::add_annotations( xref = "paper", yref = "paper", xanchor = "right", yanchor = "bottom", x = 0, y = annotate_n_y, text = "N =", font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) |> plotly::add_annotations( xref = "x", yref = "paper", yanchor = "bottom", x = seq_len(nvars * ngroups) - 1, y = 1, text = as.character(Nperbox), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } # /annotate_n # '-Annotate Mean SD ---- if (annotate_meansd) { Meanperbox <- c(t(sapply(dts, function(i) { sapply(i, function(j) mean(na.exclude(j))) }))) |> round(digits = 2) |> format(nsmall = 2) SDperbox <- c(t(sapply(dts, function(i) { sapply(i, function(j) sd(na.exclude(j))) }))) |> round(digits = 2) |> format(nsmall = 2) plt <- plt |> plotly::add_annotations( xref = "x", yref = "paper", yanchor = "bottom", x = seq_len(nvars * ngroups) - 1, y = 1, text = paste0(Meanperbox, " (", SDperbox, ")"), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } # /annotate_meansd # '-Annotate Mean ---- if (annotate_mean) { Meanperbox <- c(t(sapply(dts, function(i) { sapply(i, function(j) mean(na.exclude(j))) }))) |> round(digits = 1) |> format(nsmall = 1) plt <- plt |> plotly::add_annotations( xref = "x", yref = "paper", yanchor = "bottom", x = seq_len(nvars * ngroups) - 1, y = 1, text = Meanperbox, font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } # /annotate_mean # '- htest ---- if (htest != "none" || !is.null(pvals)) { # dts list elements are groups; columns are variables # pvals is N groups -1 x N vars if (is.null(pvals)) { if (htest_compare == 0) { pvals <- sapply(seq_len(nvars), \(cid) { sapply(2:ngroups, \(gid) { suppressWarnings( do.call( htest, list( x = dts[[1]][[cid]], y = dts[[gid]][[cid]] ) )[["p.value"]] ) }) }) pvals <- c(rbind(1, pvals)) } else if (htest_compare == 2) { pvals <- rep(1, nvars * ngroups) pvals[seq(2, ngroups * nvars, 2)] <- lapply( seq_len(nvars), \(cid) { lapply(seq(htest_compare, ngroups, htest_compare), \(gid) { suppressWarnings( do.call( htest, list( x = dts[[gid - 1]][[cid]], y = dts[[gid]][[cid]] ) )[["p.value"]] ) }) } ) |> unlist() } } # if brackets are drawn, center stars above them, otherwise # center stars above boxes axshift <- if (htest_compare == 2) 1.5 else 1 y_sb <- starbracket_y(unlist(x), pad = starbracket_pad) if (is.null(htest_y)) { htest_y <- y_sb[["star"]] } plt <- plt |> plotly::add_annotations( xref = if (horizontal) "paper" else "x", # yref = if (horizontal) "x" else "paper", yref = if (horizontal) "x" else "y", yanchor = if (horizontal) "auto" else "top", xanchor = if (horizontal) "center" else "auto", x = if (horizontal) { htest_y } else { seq_len(nvars * ngroups) - axshift }, y = if (horizontal) { seq_len(nvars * ngroups) - axshift } else { htest_y }, # text = unname(ifelse(pvals < htest.thresh, "*", "")), text = pval_stars(pvals), font = list( family = theme[["font_family"]], size = font_size, color = htest_star_col ), showarrow = FALSE ) if (htest_annotate) { test <- switch( htest, `wilcox.test` = "Wilcoxon", `t.test` = "T-test", htest ) plt <- plt |> plotly::add_annotations( xref = "paper", yref = "paper", yanchor = "top", xanchor = "left", x = htest_annotate_x, y = htest_annotate_y, # text = paste0("*", test, " p-val < ", htest.thresh), # text = paste0("* ", test, " p-val < ", htest.thresh), # text = paste0( # '* ', # test, " p-val < ", htest.thresh # ), text = paste0( test, " p-val:", ' * ', "< .05", ' ** ', "< .01", ' *** ', "< .001" ), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } # /htest.annotate # '- htest brackets for htest.compare == 2 ---- if (htest_compare == 2) { for (i in seq(2, ngroups * nvars, 2)) { if (pvals[i] < .05) { # y_bracket <- bracket_y(unlist(x)) plt <- plt |> plotly::add_trace( x = c(rep(xval[i - 1], 2), rep(xval[i], 2)), y = y_sb[["bracket"]], type = "scatter", mode = "lines", inherit = FALSE, line = list(color = htest_bracket_col, width = 1), showlegend = FALSE ) } } } } # /htest grouped } } } else { # B. Time-binned boxplots ---- time_bin <- match.arg(time_bin) if (is.null(xlab)) { xlab <- "" } if (is.null(ylab)) { ylab <- "" } if (is.null(legend)) { legend <- TRUE } dt <- data.table::as.data.table(x) if (!is.null(group)) { dt[, group := group] } if (!is.null(hovertext)) { dt[, hovertext := hovertext] } dt[, timeperiod := date2factor(time, time_bin)] |> setkey(timeperiod) Npertimeperiod <- dt[levels(timeperiod)][, lapply(.SD, \(i) length(na.exclude(i))), by = timeperiod ] |> setorder() ## Long data # appease R CMD check ID <- timeperiod <- NULL dtlong <- data.table::melt( dt[, ID := .I], id.vars = c( "ID", "timeperiod", mgetnames(dt, "group", "hovertext") ) ) if (is.null(group)) { .args <- list( data = dtlong, type = type, x = if (horizontal) ~value else ~timeperiod, y = if (horizontal) ~timeperiod else ~value, color = ~variable, colors = col2hex(col), showlegend = legend ) } else { .args <- list( data = dtlong, type = type, x = if (horizontal) ~value else ~timeperiod, y = if (horizontal) ~timeperiod else ~value, color = ~group, colors = col2hex(col), showlegend = legend ) } if (!is.null(hovertext)) { .args[["text"]] <- dtlong[["hovertext"]] } if (type == "box") { .args <- c( .args, list( quartilemethod = quartilemethod, boxpoints = boxpoints ) ) } if (type == "violin") { .args[["box"]] <- list(visible = violin_box) } .args <- c(list(width = width, height = height), .args) plt <- do.call(plotly::plot_ly, .args) if (!is.null(group) || nvars > 1) { plt <- plt |> plotly::layout(boxmode = "group") } # '-Annotate N ---- if (is.null(group) && annotate_n) { Nperbox <- Npertimeperiod[[2]] # include zeros plt <- plt |> plotly::add_annotations( xref = "paper", yref = "paper", xanchor = "right", yanchor = "bottom", x = 0, y = annotate_n_y, text = "N =", font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) |> plotly::add_annotations( xref = "x", yref = "paper", yanchor = "bottom", x = seq_along(Nperbox) - 1, y = 1, text = paste(Nperbox), font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } } # /time-binned boxplots # Layout ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) tickfont <- list( family = theme[["font_family"]], size = font_size, color = theme[["tick_labels_col"]] ) .legend <- list( x = legend_xy[1], y = legend_xy[2], xanchor = legend_xanchor, yanchor = legend_yanchor, bgcolor = "#ffffff00", font = list( family = theme[["font_family"]], size = font_size, color = legend_col ), orientation = legend_orientation ) yaxis_title <- if (horizontal) xlab else ylab plt <- plotly::layout( plt, yaxis = list( title = list(text = yaxis_title, standoff = ylab_standoff), type = if (horizontal) xaxis_type else NULL, titlefont = f, showgrid = if (horizontal) FALSE else theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = if (horizontal) NA else tick_col, tickfont = tickfont, zeroline = FALSE, automargin = automargin_y, range = ylim ), xaxis = list( title = if (horizontal) ylab else xlab, type = if (horizontal) NULL else xaxis_type, titlefont = f, showgrid = if (horizontal) theme[["grid"]] else FALSE, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = if (horizontal) tick_col else NA, tickfont = tickfont, automargin = automargin_x, range = xlim ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]] ), paper_bgcolor = bg, plot_bgcolor = plot_bg, margin = margin, legend = .legend, # boxgap = boxgap, boxgroupgap = boxgroupgap ) # Config ---- plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ), mathjax = mathjax ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_box.R ================================================ FILE: R/draw_calibration.R ================================================ # draw_calibration.R # ::rtemis:: # 2023 EDG rtemis.org #' Draw calibration plot #' #' @param true_labels Factor or list of factors with true class labels #' @param predicted_prob Numeric vector or list of numeric vectors with predicted probabilities #' @param bin_method Character: "quantile" or "equidistant": Method to bin the estimated #' probabilities. #' @param n_bins Integer: Number of windows to split the data into #' @param binclasspos Integer: Index of the positive class. The convention used in the package is #' the second level is the positive class. #' @param main Character: Main title #' @param subtitle Character: Subtitle, placed bottom right of plot #' @param xlab Character: x-axis label #' @param ylab Character: y-axis label #' @param show_marginal_x Logical: Add marginal plot of distribution of estimated probabilities #' @param marginal_x_y Numeric: y position of marginal plot #' @param marginal_col Character: Color of marginal plot #' @param marginal_size Numeric: Size of marginal plot #' @param mode Character: "lines", "markers", "lines+markers": How to plot. #' @param show_brier Logical: If TRUE, add Brier scores to trace names. #' @param theme `Theme` object. #' @param filename Character: Path to save output. #' @param ... Additional arguments passed to [draw_scatter] #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # Synthetic data with n cases #' n <- 500L #' true_labels <- factor(sample(c("A", "B"), n, replace = TRUE)) #' # Synthetic probabilities where A has mean 0.25 and B has mean 0.75 #' predicted_prob <- ifelse(true_labels == "A", #' rbeta(n, 2, 6), #' rbeta(n, 6, 2) #' ) #' draw_calibration(true_labels, predicted_prob) draw_calibration <- function( true_labels, predicted_prob, n_bins = 10L, bin_method = c("quantile", "equidistant"), binclasspos = 2L, main = NULL, subtitle = NULL, xlab = "Mean predicted probability", ylab = "Empirical risk", show_marginal_x = TRUE, marginal_x_y = -.02, marginal_col = NULL, marginal_size = 10, mode = "markers+lines", show_brier = TRUE, theme = choose_theme(getOption("rtemis_theme")), filename = NULL, ... ) { # Arguments ---- bin_method <- match.arg(bin_method) if (!is.list(true_labels)) { true_labels <- list(true_labels = true_labels) } if (!is.list(predicted_prob)) { predicted_prob <- list(estimated_prob = predicted_prob) } # Ensure same number of inputs stopifnot(length(true_labels) == length(predicted_prob)) # Theme ---- check_is_S7(theme, Theme) pos_class <- lapply(true_labels, \(x) { levels(x)[binclasspos] }) # Ensure same positive class stopifnot(length(unique(unlist(pos_class))) == 1) # Create windows if (bin_method == "equidistant") { breaks <- lapply(seq_along(predicted_prob), \(x) { seq(0, 1, length.out = n_bins + 1) }) } else if (bin_method == "quantile") { breaks <- lapply(predicted_prob, \(x) { quantile(x, probs = seq(0, 1, length.out = n_bins + 1)) }) } # Calculate the mean probability in each window mean_bin_prob <- lapply(seq_along(predicted_prob), \(i) { sapply(seq_len(n_bins), \(j) { mean(predicted_prob[[i]][ predicted_prob[[i]] >= breaks[[i]][j] & predicted_prob[[i]] < breaks[[i]][j + 1] ]) }) }) names(mean_bin_prob) <- names(predicted_prob) # Calculate the proportion of condition positive cases in each window window_empirical_risk <- lapply(seq_along(predicted_prob), \(i) { sapply(seq_len(n_bins), \(j) { idl <- predicted_prob[[i]] >= breaks[[i]][j] & predicted_prob[[i]] < breaks[[i]][j + 1] sum(true_labels[[i]][idl] == pos_class[[i]]) / sum(idl) }) }) names(window_empirical_risk) <- names(predicted_prob) # Add Brier score if (show_brier) { .brier_score <- sapply(seq_along(predicted_prob), \(i) { brier_score( true_int = labels2int(true_labels[[i]], binclasspos), predicted_prob = predicted_prob[[i]] ) }) names(window_empirical_risk) <- paste0( names(window_empirical_risk), " (Brier=", round(.brier_score, 3), ")" ) } # Calculate confidence intervals # confint <- sapply(seq_len(n_bins), \(i) { # events <- length(true_labels[true_labels == pos_class & predicted_prob >= breaks[i] & predicted_prob < breaks[i + 1]]) # total <- length(predicted_prob >= breaks[i] & predicted_prob < breaks[i + 1]) # suppressWarnings(pt <- prop.test( # events, total, # conf.level = conf_level # )) # pt$conf.int # }) # Plot if (is.null(subtitle)) { subtitle <- paste( "using", n_bins, if (bin_method == "quantile") "quantiles" else "equidistant bins" ) } # if (is.null(subtitle) && !is.na(subtitle)) .subtitle <- paste0(subtitle, "\n", .subtitle) plt <- draw_scatter( x = mean_bin_prob, y = window_empirical_risk, main = main, # subtitle = paste("", .subtitle, ""), subtitle = subtitle, subtitle_x = 1, subtitle_y = 0, subtitle_yref = "y", subtitle_xanchor = "right", subtitle_yanchor = "bottom", xlab = xlab, ylab = ylab, show_marginal_x = show_marginal_x, marginal_x = predicted_prob, marginal_x_y = marginal_x_y, marginal_size = marginal_size, axes_square = TRUE, diagonal = TRUE, xlim = c(0, 1), ylim = c(0, 1), mode = mode, theme = theme, filename = filename, ... ) # Add marginal.x ---- # Using estimated probabilities # if (marginal.x) { # if (is.null(marginal.col)) marginal.col <- plotly::toRGB(theme[["fg"]], alpha = .5) # for (i in seq_along(mean_bin_prob)) { # plt <- plotly::add_trace( # plt, # x = predicted_prob[[i]], # y = rep(-.02, length(predicted_prob[[i]])), # type = "scatter", # mode = "markers", # marker = list( # color = marginal.col, # size = marginal.size, # symbol = "line-ns-open" # ), # showlegend = FALSE, # hoverinfo = "x" # ) # } # } # /marginal.x plt } # /rtemis::draw_calibration ================================================ FILE: R/draw_confusion.R ================================================ # draw_confusion.R # ::rtemis:: # 2024- EDG rtemis.org #' Plot confusion matrix #' #' @param x `ClassificationMetrics` object produced by [classification_metrics] or confusion matrix #' where rows are the reference and columns are the estimated classes. For binary classification, #' the first row and column are the positive class. #' @param xlab Character: x-axis label. Default is "Predicted". #' @param ylab Character: y-axis label. Default is "Reference". #' @param true_col Color for true positives & true negatives. #' @param false_col Color for false positives & false negatives. #' @param font_size Integer: font size. #' @param main Character: plot title. #' @param main_y Numeric: y position of the title. #' @param main_yanchor Character: y anchor of the title. #' @param theme `Theme` object. #' @param margin List: Plot margins. #' @param filename Character: file name to save the plot. Default is NULL. #' @param file_width Numeric: width of the file. Default is 500. #' @param file_height Numeric: height of the file. Default is 500. #' @param file_scale Numeric: scale of the file. Default is 1. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # Assume positive class is "b" #' true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b")) #' predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a")) #' predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39) #' metrics <- classification_metrics(true_labels, predicted_labels, predicted_prob) #' draw_confusion(metrics) draw_confusion <- function( x, xlab = "Predicted", ylab = "Reference", true_col = "#43A4AC", false_col = "#FA9860", font_size = 18, main = NULL, main_y = 1, main_yanchor = "bottom", theme = choose_theme(getOption("rtemis_theme")), margin = list(l = 20, r = 5, b = 5, t = 20), # write to file filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) { # Input ---- if (S7_inherits(x, ClassificationMetrics)) { x <- x@metrics[["Confusion_Matrix"]] } if (is.null(dim(x)) || ncol(x) != nrow(x)) { cli::cli_abort("The confusion matrix must be a square matrix.") } # Metrics ---- nclasses <- ncol(x) total <- sum(x) class_totals <- rowSums(x) condition_negative <- total - class_totals predicted_totals <- colSums(x) hits <- diag(x) # misses = class_totals - hits class_sensitivity <- hits / class_totals true_negative <- total - predicted_totals - (class_totals - hits) class_specificity <- true_negative / condition_negative class_balancedAccuracy <- .5 * (class_sensitivity + class_specificity) # PPV = true positive / predicted condition positive class_ppv <- hits / predicted_totals # NPV = true negative / predicted condition negative class_npv <- true_negative / (total - predicted_totals) # Theme ---- check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) main_col <- plotly::toRGB(theme[["main_col"]]) # Colors ---- pos_color <- colorRamp(colors = c(theme[["bg"]], true_col)) neg_color <- colorRamp(colors = c(theme[["bg"]], false_col)) # Fonts ---- f <- list( family = theme[["font_family"]], size = font_size, color = theme[["labs_col"]] ) # Plot ---- plt <- plotly::plot_ly( type = "scatter", mode = "lines" ) # Add colored tiles & counts ---- for (i in seq_len(nclasses)) { for (j in seq_len(nclasses)) { plt <- make_plotly_conf_tile( p = plt, x = x, i = i, j = j, pos_color = pos_color, neg_color = neg_color, font_size = font_size, theme = theme ) } } # Layout ---- plt <- plotly::layout( plt, xaxis = list( side = "above", showticklabels = FALSE, showgrid = FALSE, zeroline = FALSE ), yaxis = list( showticklabels = FALSE, showgrid = FALSE, zeroline = FALSE, autorange = "reversed", scaleanchor = "x", scaleratio = 1 ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]], yref = "paper", y = main_y, yanchor = main_yanchor ), showlegend = FALSE, paper_bgcolor = bg, plot_bgcolor = plot_bg, margin = margin ) # /layout # Class labels ---- # Add class labels above and to the left of the plot # Left plt <- plotly::add_annotations( plt, x = rep(-0.125, nclasses), y = seq_len(nclasses) - 0.5, text = colnames(x), # textposition = "middle right", font = f, showarrow = FALSE, textangle = -90 ) # Above plt <- plotly::add_annotations( plt, x = seq_len(nclasses) - 0.5, y = rep(-0.125, nclasses), text = colnames(x), # textposition = "bottom center", font = f, showarrow = FALSE ) # x-axis label "Predicted" plt <- plotly::add_annotations( plt, x = nclasses / 2, y = ifelse(nclasses == 2, -.3, -0.5), text = xlab, font = f, showarrow = FALSE ) # y-axis label "Reference" plt <- plotly::add_annotations( plt, x = ifelse(nclasses == 2, -.3, -0.5), y = nclasses / 2, text = ylab, font = f, showarrow = FALSE, textangle = -90 ) # Metrics ---- if (nclasses == 2) { # Sens./Spec. ---- # Rect: Sens./Spec. bg plt <- plotly::add_trace( plt, x = c(nclasses, nclasses + 0.3, nclasses + 0.3, nclasses), y = c(0, 0, nclasses, nclasses), line = list(color = "transparent"), fill = "toself", fillcolor = plotly::toRGB(theme[["fg"]], alpha = .075), showlegend = FALSE ) # Text: Sens. & Spec. plt <- plotly::add_annotations( plt, x = rep(nclasses + 0.15, 2), y = c(.5, 1.5), text = paste0( c("Sensitivity\n", "Specificity\n"), c(ddSci(class_sensitivity[1], 3), ddSci(class_specificity[1], 3)) ), font = f, showarrow = FALSE, textangle = -90 ) # PPV/NPV ---- # Rect: PPV/NPV bg plt <- plotly::add_trace( plt, x = c(0, nclasses, nclasses, 0, 0), y = c(nclasses, nclasses, nclasses + .3, nclasses + .3, nclasses), line = list(color = "transparent"), fill = "toself", fillcolor = plotly::toRGB(theme[["fg"]], alpha = .075), showlegend = FALSE ) # Text: PPV & NPV plt <- plotly::add_annotations( plt, x = c(.5, 1.5), y = rep(nclasses + 0.15, 2), text = paste0( c("PPV\n", "NPV\n"), c(ddSci(class_ppv[1], 3), ddSci(class_npv[1], 3)) ), font = f, showarrow = FALSE ) } else { # PPV ---- # Text: "PPV" at bottom left corner plt <- plotly::add_annotations( plt, x = -0.05, y = nclasses + .1, xanchor = "right", yanchor = "middle", text = "PPV", font = f, showarrow = FALSE ) # Rect: PPV bg plt <- plotly::add_trace( plt, x = c(0, nclasses, nclasses, 0, 0), y = c(nclasses, nclasses, nclasses + 0.2, nclasses + 0.2, nclasses), line = list(color = "transparent"), fill = "toself", fillcolor = plotly::toRGB(theme[["fg"]], alpha = .075), showlegend = FALSE ) # Text: Per-class PPV for (i in seq_len(nclasses)) { plt <- plotly::add_annotations( plt, x = i - 0.5, y = nclasses + .1, text = ddSci(class_ppv[i], 3), font = f, showarrow = FALSE ) } # NPV ---- # Label: "NPV" at bottom left corner plt <- plotly::add_annotations( plt, x = -0.05, y = nclasses + .3, xanchor = "right", yanchor = "middle", text = "NPV", font = f, showarrow = FALSE ) # Rect: NPV bg plt <- plotly::add_trace( plt, x = c(0, nclasses, nclasses, 0, 0), y = c( nclasses + 0.2, nclasses + 0.2, nclasses + 0.4, nclasses + 0.4, nclasses + 0.2 ), line = list(color = "transparent"), fill = "toself", fillcolor = plotly::toRGB(theme[["fg"]], alpha = .05), showlegend = FALSE ) # Text: Per-class NPV for (i in seq_len(nclasses)) { plt <- plotly::add_annotations( plt, x = i - 0.5, y = nclasses + .3, text = ddSci(class_npv[i], 3), font = f, showarrow = FALSE ) } # Sensitivity ---- # Label: "Sens." top right vertically plt <- plotly::add_annotations( plt, x = nclasses + 0.1, y = -.05, yanchor = "bottom", text = "Sens.", font = f, showarrow = FALSE, textangle = -90 ) # Rect: Sens. bg plt <- plotly::add_trace( plt, x = c(nclasses, nclasses + 0.2, nclasses + 0.2, nclasses), y = c(0, 0, nclasses, nclasses), line = list(color = "transparent"), fill = "toself", fillcolor = plotly::toRGB(theme[["fg"]], alpha = .075), showlegend = FALSE ) # Text: Per-class Sens. for (i in seq_len(nclasses)) { plt <- plotly::add_annotations( plt, x = nclasses + 0.1, y = i - 0.5, text = ddSci(class_sensitivity[i], 3), font = f, showarrow = FALSE, textangle = -90 ) } # Specificity ---- # Label: "Spec." top right vertically plt <- plotly::add_annotations( plt, x = nclasses + 0.3, y = -.05, yanchor = "bottom", text = "Spec.", font = f, showarrow = FALSE, textangle = -90 ) # Rect: Spec. bg plt <- plotly::add_trace( plt, x = c(nclasses + 0.2, nclasses + 0.4, nclasses + 0.4, nclasses + 0.2), y = c(0, 0, nclasses, nclasses), line = list(color = "transparent"), fill = "toself", fillcolor = plotly::toRGB(theme[["fg"]], alpha = .05), showlegend = FALSE ) # Text: Per-class Spec. for (i in seq_len(nclasses)) { plt <- plotly::add_annotations( plt, x = nclasses + 0.3, y = i - 0.5, text = ddSci(class_specificity[i], 3), font = f, showarrow = FALSE, textangle = -90 ) } } # Balanced Accuracy ---- # Rect: BA bg ba_pad <- ifelse(nclasses == 2, 0.3, 0.4) plt <- plotly::add_trace( plt, x = c(nclasses, nclasses + ba_pad, nclasses + ba_pad, nclasses), y = c(nclasses, nclasses, nclasses + ba_pad, nclasses + ba_pad), line = list(color = "transparent"), fill = "toself", fillcolor = plotly::toRGB(theme[["fg"]], alpha = .025), showlegend = FALSE ) # Text: Balanced accuracy ba_pad <- ifelse(nclasses == 2, 0.15, 0.2) ba <- ifelse( nclasses == 2, class_balancedAccuracy[1], mean(class_balancedAccuracy) ) plt <- plotly::add_annotations( plt, x = nclasses + ba_pad, y = nclasses + ba_pad, xanchor = "center", yanchor = "middle", text = paste0("BA\n", ddSci(ba, 3)), font = f, showarrow = FALSE ) # Disable hoverinfo plt <- plotly::style(plt, hoverinfo = "none") # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly return(plt) } # /rtemis::draw_confusion #' Make plotly confusion matrix tile #' #' @author EDG #' @keywords internal #' @noRd make_plotly_conf_tile <- function( p, x, i, j, pos_color, neg_color, font_size, theme, xref = "x", yref = "y" ) { val <- x[i, j] / sum(x[i, ]) col <- if (i == j) { pos_color(val) } else { neg_color(val) } col <- rgb(col[1], col[2], col[3], maxColorValue = 255) # Add colored tile p <- plotly::add_trace( p, x = c(j - 1, j - 1, j, j, j - 1), y = c(i, i - 1, i - 1, i, i), line = list(color = "transparent"), fill = "toself", fillcolor = col ) # Add text p <- plotly::add_trace( p, x = j - 0.5, y = i - 0.5, mode = "text", text = paste0("", x[i, j], ""), textposition = "middle center", textfont = list( family = theme[["font_family"]], color = ifelse(val > 0.5, theme[["bg"]], theme[["fg"]]), size = font_size ), showlegend = FALSE ) return(p) } # /rtemis::make_plotly_conf_tile ================================================ FILE: R/draw_dist.R ================================================ # draw_dist.R # ::rtemis:: # 2019- EDG rtemis.org # check whether list is reordered with ridge #' Draw Distributions using Histograms and Density Plots #' #' Draw Distributions using Histograms and Density Plots using `plotly`. #' #' @details #' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation. #' #' If input is data.frame, non-numeric variables will be removed. #' #' @param x Numeric vector / data.frame / list: Input. If not a vector, each column / each element is drawn. #' @param type Character: "density" or "histogram". #' @param mode Character: "overlap", "ridge". How to plot different groups; on the same axes ("overlap"), or on separate plots with the same x-axis ("ridge"). #' @param group Vector: Will be converted to factor; levels define group members. #' @param main Character: Main title for the plot. #' @param xlab Character: Label for the x-axis. #' @param ylab Character: Label for the y-axis. #' @param col Color: Colors for the plot. #' @param alpha Numeric: Alpha transparency for plot elements. #' @param plot_bg Color: Background color for plot area. #' @param theme `Theme` object. #' @param palette Character: Color palette to use. #' @param axes_square Logical: If TRUE, draw a square plot to fill the graphic device. Default = FALSE. #' @param group_names Character: Names for the groups. #' @param font_size Numeric: Font size for plot text. #' @param font_alpha Numeric: Alpha transparency for font. #' @param legend Logical: If TRUE, draw legend. Default = NULL, which will be set to TRUE if x is a list of more than 1 element. #' @param legend_xy Numeric, vector, length 2: Relative x, y position for legend. Default = c(0, 1). #' @param legend_col Color: Color for the legend text. #' @param legend_bg Color: Background color for legend. #' @param legend_border_col Color: Border color for legend. #' @param bargap Numeric: The gap between adjacent histogram bars in plot fraction. #' @param vline Numeric, vector: If defined, draw a vertical line at this x value(s). #' @param vline_col Color: Color for `vline`. #' @param vline_width Numeric: Width for `vline`. #' @param vline_dash Character: Type of line to draw: "solid", "dot", "dash", "longdash", "dashdot", or "longdashdot". #' @param text Character: If defined, add this text over the plot. #' @param text_x Numeric: x-coordinate for `text`. #' @param text_xref Character: "x": `text_x` refers to plot's x-axis; "paper": `text_x` refers to plotting area from 0-1. #' @param text_xanchor Character: "auto", "left", "center", "right". #' @param text_y Numeric: y-coordinate for `text`. #' @param text_yref Character: "y": `text_y` refers to plot's y-axis; "paper": `text_y` refers to plotting area from 0-1. #' @param text_yanchor Character: "auto", "top", "middle", "bottom". #' @param text_col Color: Color for `text`. #' @param margin List: Margins for the plot. #' @param automargin_x Logical: If TRUE, automatically adjust x-axis margins. #' @param automargin_y Logical: If TRUE, automatically adjust y-axis margins. #' @param zerolines Logical: If TRUE, draw lines at y = 0. #' @param density_kernel Character: Kernel to use for density estimation. #' @param density_bw Character: Bandwidth to use for density estimation. #' @param histnorm Character: NULL, "percent", "probability", "density", "probability density". #' @param histfunc Character: "count", "sum", "avg", "min", "max". #' @param hist_n_bins Integer: Number of bins to use if type = "histogram". #' @param barmode Character: Barmode for histogram. One of "overlay", "stack", "relative", "group". #' @param ridge_sharex Logical: If TRUE, draw single x-axis when `mode = "ridge"`. #' @param ridge_y_labs Logical: If TRUE, show individual y labels when `mode = "ridge"`. #' @param ridge_order_on_mean Logical: If TRUE, order groups by mean value when `mode = "ridge"`. #' @param displayModeBar Logical: If TRUE, display the mode bar. #' @param modeBar_file_format Character: File format for mode bar. Default = "svg". #' @param width Numeric: Force plot size to this width. Default = NULL, i.e. fill available space. #' @param height Numeric: Force plot size to this height. Default = NULL, i.e. fill available space. #' @param filename Character: Path to file to save static plot. #' @param file_width Integer: File width in pixels for when `filename` is set. #' @param file_height Integer: File height in pixels for when `filename` is set. #' @param file_scale Numeric: If saving to file, scale plot by this number. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # Will automatically use only numeric columns #' draw_dist(iris) #' draw_dist(iris[["Sepal.Length"]], group = iris[["Species"]]) draw_dist <- function( x, type = c("density", "histogram"), mode = c("overlap", "ridge"), group = NULL, main = NULL, xlab = NULL, ylab = NULL, col = NULL, alpha = .75, plot_bg = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = getOption("rtemis_palette", "rtms"), axes_square = FALSE, group_names = NULL, font_size = 16, font_alpha = .8, legend = NULL, legend_xy = c(0, 1), legend_col = NULL, legend_bg = "#FFFFFF00", legend_border_col = "#FFFFFF00", bargap = .05, vline = NULL, vline_col = theme[["fg"]], vline_width = 1, vline_dash = "dot", text = NULL, text_x = 1, text_xref = "paper", text_xanchor = "left", text_y = 1, text_yref = "paper", text_yanchor = "top", text_col = theme[["fg"]], margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0), automargin_x = TRUE, automargin_y = TRUE, zerolines = FALSE, density_kernel = "gaussian", density_bw = "SJ", histnorm = c( "", "density", "percent", "probability", "probability density" ), histfunc = c("count", "sum", "avg", "min", "max"), hist_n_bins = 20, barmode = "overlay", # ?alternatives ridge_sharex = TRUE, ridge_y_labs = FALSE, ridge_order_on_mean = TRUE, displayModeBar = TRUE, modeBar_file_format = "svg", width = NULL, height = NULL, filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) { # Dependencies ---- check_dependencies("plotly") # Arguments ---- type <- match.arg(type) mode <- match.arg(mode) if (!is.null(main)) { main <- paste0("", main, "") } .xname <- labelify(deparse(substitute(x))) # Data ---- # '- Group ---- if (!is.null(group)) { if (is.factor(group)) { group <- droplevels(group) } else { group <- as.factor(group) } x <- as.data.frame(x) # Can't have multiple vectors in `x` and `group` if (length(x) > 1 && !is.null(group)) { cli::cli_abort( "Can't have both multiple vectors in `x` and `group` defined." ) } x <- split(x, group) x <- sapply(x, as.vector) if (is.null(group_names)) { group_names <- levels(group) } names(x) <- .names <- group_names } if (!is.list(x)) { x <- list(x) } n_groups <- length(x) if (n_groups == 1 && is.null(xlab)) { xlab <- .xname } # Remove non-numeric vectors which_nonnum <- which(sapply(x, function(i) !is.numeric(i))) if (length(which_nonnum) > 0) { for (i in rev(which_nonnum)) { x[[i]] <- NULL } } if (is.null(legend)) { legend <- length(x) > 1 } if (!is.null(group_names)) { .names <- group_names } else { .names <- labelify(names(x)) } if (is.null(.names)) { .names <- paste("Feature", seq_along(x)) } # Colors ---- if (is.character(palette)) { palette <- get_palette(palette) } n_groups <- length(x) if (is.null(col)) { col <- recycle(palette, seq(n_groups))[seq(n_groups)] } if (length(col) < n_groups) { col <- rep(col, n_groups / length(col)) } # Theme ---- check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]]) tick_col <- plotly::toRGB(theme[["tick_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) if (!theme[["axes_visible"]]) { tick_col <- labs_col <- "transparent" } # '- Axis font ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) # '- Tick font ---- tickfont <- list( family = theme[["font_family"]], size = font_size, color = theme[["tick_labels_col"]] ) # Derived if (is.null(legend_col)) { legend_col <- labs_col } # Size ---- if (axes_square) { width <- height <- min(dev.size("px")) - 10 } # Ridge ---- if (mode == "ridge") { axis <- list( showline = FALSE, # mirror = axes_mirrored, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = zerolines ) ridge_groups <- if (ridge_order_on_mean) { order(sapply(x, mean), decreasing = TRUE) } else { seq_len(n_groups) } } # plotly ---- # z <- if (mode == "overlap") rep(1, n_groups) else seq_len(n_groups) # plt <- vector("list", n_groups) .text <- lapply(x, function(i) { paste( "mean =", ddSci(mean(i, na.rm = TRUE)), "\nsd =", ddSci(sd(i, na.rm = TRUE)) ) }) # '- { Density } ---- if (type == "density") { if (is.null(ylab)) { ylab <- "Density" } xl_density <- lapply( x, density, na.rm = TRUE, kernel = density_kernel, bw = density_bw ) if (mode == "overlap") { # '- Density overlap ---- plt <- plotly::plot_ly( width = width, height = height ) for (i in seq_len(n_groups)) { plt <- plotly::add_trace( plt, x = xl_density[[i]][["x"]], y = xl_density[[i]][["y"]], type = "scatter", mode = "none", fill = "tozeroy", fillcolor = plotly::toRGB(col[[i]], alpha), name = .names[i], hovertext = .text[[i]], hoverinfo = "text", showlegend = legend ) } } else { # '- Density ridge ---- plt <- lapply(ridge_groups, function(i) { plotly::plot_ly( x = xl_density[[i]][["x"]], y = xl_density[[i]][["y"]], type = "scatter", mode = "none", fill = "tozeroy", fillcolor = plotly::toRGB(col[[i]], alpha), name = .names[i], hovertext = .text[[i]], hoverinfo = "text", showlegend = legend, width = width, height = height ) |> plotly::layout( xaxis = axis, yaxis = c( list( title = list( text = .names[i], font = f ) ), axis ) ) }) } } # End mode == "density" # '- { Histogram } ---- if (type == "histogram") { # https://plotly.com/r/reference/#histogram-bingroup bingroup <- 1 histnorm <- match.arg(histnorm) histfunc <- match.arg(histfunc) # if (is.null(ylab)) ylab <- "Count" if (mode == "overlap") { # '- Histogram overlap ---- plt <- plotly::plot_ly( width = width, height = height ) for (i in seq_len(n_groups)) { plt <- plotly::add_trace( plt, x = x[[i]], type = "histogram", marker = list(color = plotly::toRGB(col[i], alpha)), name = .names[i], hovertext = .text[[i]], hoverinfo = "text", histnorm = histnorm, histfunc = histfunc, nbinsx = hist_n_bins, showlegend = legend, bingroup = bingroup ) } plt <- plotly::layout(plt, barmode = barmode, bargap = bargap) } else { # '- Histogram ridge ---- plt <- lapply(ridge_groups, function(i) { plotly::plot_ly( x = x[[i]], type = "histogram", histnorm = histnorm, histfunc = histfunc, nbinsx = hist_n_bins, marker = list(color = plotly::toRGB(col[i], alpha)), name = .names[i], hovertext = .text[[i]], hoverinfo = "text", showlegend = legend, width = width, height = height, bingroup = bingroup ) |> plotly::layout( xaxis = axis, yaxis = c( list( title = list( text = .names[i], font = f ) ), axis ), bargap = bargap ) }) } } if (mode == "ridge") { plt <- plotly::subplot( plt, nrows = n_groups, shareX = ridge_sharex, # shareY = ridge_sharey, titleY = ridge_y_labs ) } # Layout ---- zerocol <- adjustcolor(theme[["zerolines_col"]], theme[["zerolines_alpha"]]) # '- layout ---- .legend <- list( x = legend_xy[1], y = legend_xy[2], font = list( family = theme[["font_family"]], size = font_size, color = legend_col ), bgcolor = legend_bg, bordercolor = legend_border_col ) plt <- plotly::layout( plt, xaxis = list( title = list( text = xlab, font = f ), showline = FALSE, # mirror = axes_mirrored, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = FALSE, automargin = automargin_x ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]] ), paper_bgcolor = bg, plot_bgcolor = plot_bg, margin = margin, showlegend = legend, legend = .legend ) if (mode == "overlap") { plt <- plotly::layout( plt, yaxis = list( title = list( text = ylab, font = f ), showline = FALSE, # mirror = axes_mirrored, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = zerolines, zerolinecolor = zerocol, zerolinewidth = theme[["zerolines_lwd"]], automargin = automargin_y ) ) } # vline ---- if (!is.null(vline)) { plt <- plotly::layout( plt, shapes = plotly_vline( vline, color = vline_col, width = vline_width, dash = vline_dash ) ) } # text ---- if (!is.null(text)) { plt <- plotly::layout( plt, annotations = list( text = text, x = text_x, xref = text_xref, xanchor = text_xanchor, y = text_y, yref = text_yref, yanchor = text_yanchor, font = list( color = text_col, family = theme[["font_family"]], size = font_size ), showarrow = FALSE ) ) } # Config plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ) ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_dist ================================================ FILE: R/draw_graphd3.R ================================================ # draw_graphD3 # ::rtemis:: # EDG rtemis.org #' Plot graph using \pkg{networkD3} #' #' @param net \pkg{igraph} network. #' @param groups Vector, length n nodes indicating group/cluster/community membership of nodes in `net`. #' @param color_scale D3 colorscale (e.g. `networkD3::JS("d3.scaleOrdinal(d3.schemeCategory20b);")`). #' @param edge_col Color for edges. #' @param node_col Color for nodes. #' @param node_alpha Float \[0, 1\]: Node opacity. #' @param edge_alpha Float \[0, 1\]: Edge opacity. #' @param zoom Logical: If TRUE, graph is zoomable. #' @param legend Logical: If TRUE, display legend for groups. #' @param palette Character vector: Colors to use. #' @param theme `Theme` object. #' @param ... Additional arguments to pass to `networkD3`. #' #' @return `forceNetwork` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' library(igraph) #' g <- make_ring(10) #' draw_graphD3(g) draw_graphD3 <- function( net, groups = NULL, color_scale = NULL, edge_col = NULL, node_col = NULL, node_alpha = .5, edge_alpha = .33, zoom = TRUE, legend = FALSE, palette = get_palette(getOption("rtemis_palette")), theme = choose_theme(getOption("rtemis_theme")), ... ) { # Dependencies ---- check_dependencies("networkD3") # Theme ---- check_is_S7(theme, Theme) netd3 <- networkD3::igraph_to_networkD3(net) if (is.null(groups)) { netd3[["nodes"]][["group"]] <- "A" } else { netd3[["nodes"]][["group"]] <- groups } # Colors ---- if (is.null(node_col) && length(unique(netd3[["nodes"]][["group"]])) == 1) { node_col <- theme[["fg"]] } if (is.null(color_scale)) { if (length(unique(netd3[["nodes"]][["group"]])) == 1) { color_scale <- paste0( 'd3.scaleOrdinal().domain(["A"]).range(["', adjustcolor(node_col, node_alpha), '"]);' ) } else { if (is.character(palette)) { palette <- adjustcolor(unlist(get_palette(palette)), node_alpha) } ngroups <- length(unique(groups)) .groups <- paste0(sort(unique(groups)), collapse = '", "') if (ngroups > length(palette)) { palette <- rep(palette, ngroups / length(palette)) } .colors <- paste0(palette[seq(ngroups)], collapse = '", "') color_scale <- paste0( 'd3.scaleOrdinal().domain(["', .groups, '"]).range(["', .colors, '"]);' ) } } if (is.null(edge_col)) { if (is.null(groups)) { edge_col <- adjustcolor("#18A3AC", edge_alpha) } else { edge_col <- adjustcolor(theme[["fg"]], edge_alpha) } } else { edge_col <- adjustcolor(edge_col, edge_alpha) } # Plot ---- fn <- networkD3::forceNetwork( Links = netd3[["links"]], Nodes = netd3[["nodes"]], Source = "source", Target = "target", NodeID = "name", Group = "group", colourScale = color_scale, linkColour = edge_col, opacity = 1, legend = legend, zoom = zoom ) # fn$x$nodes$border <- border.groups fn <- htmlwidgets::onRender( fn, 'function(el, x) { d3.selectAll("circle").style("stroke", d => "#ffffff00"); }' ) fn } # /rtemis::draw_graphD3 ================================================ FILE: R/draw_graphjs.R ================================================ # draw_graphjs.R # ::rtemis:: # EDG rtemis.org #' Plot network using \pkg{threejs::graphjs} #' #' Interactive plotting of an \pkg{igraph} net using \pkg{threejs}. #' #' @param net \pkg{igraph} network. #' @param vertex_size Numeric: Vertex size. #' @param vertex_col Color for vertices. #' @param vertex_label_col Color for vertex labels. #' @param vertex_label_alpha Numeric: Transparency for `vertex_label_col`. #' @param vertex_frame_col Color for vertex border (frame). #' @param vertex_label Character vector: Vertex labels. Default = NULL, which will keep existing names in `net` if any. Set to NA to avoid printing vertex labels. #' @param vertex_shape Character, vector, length 1 or N nodes: Vertex shape. See `graphjs("vertex_shape")`. #' @param edge_col Color for edges. #' @param edge_alpha Numeric: Transparency for edges. #' @param edge_curved Numeric: Curvature of edges. #' @param edge_width Numeric: Edge thickness. #' @param layout Character: one of: "fr", "dh", "drl", "gem", "graphopt", "kk", "lgl", "mds", "sugiyama", corresponding to all the available layouts in \pkg{igraph}. #' @param coords Output of precomputed \pkg{igraph} layout. If provided, `layout` is ignored. #' @param layout_args List of arguments to pass to `layout` function. #' @param cluster Character: one of: "edge_betweenness", "fast_greedy", "infomap", "label_prop", "leading_eigen", "louvain", "optimal", "spinglass", "walktrap", corresponding to all the available \pkg{igraph} clustering functions. #' @param groups Output of precomputed \pkg{igraph} clustering. If provided, `cluster` is ignored. #' @param cluster_config List of arguments to pass to `cluster` function. #' @param cluster_mark_groups Logical: If TRUE, draw polygons to indicate clusters, if `groups` or `cluster` are defined. #' @param cluster_color_vertices Logical: If TRUE, color vertices by cluster membership. #' @param main Character: Main title. #' @param theme `Theme` object. #' @param palette Color vector or name of rtemis palette. #' @param mar Numeric vector, length 4: `par`'s margin argument. #' @param filename Character: If provided, save plot to this filepath. #' @param verbosity Integer: Verbosity level. #' @param ... Extra arguments to pass to `igraph::plot.igraph()`. #' #' @return `scatterplotThree` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' library(igraph) #' g <- make_ring(10) #' draw_graphjs(g) draw_graphjs <- function( net, vertex_size = 1, vertex_col = NULL, vertex_label_col = NULL, vertex_label_alpha = .66, vertex_frame_col = NA, vertex_label = NULL, vertex_shape = "circle", edge_col = NULL, edge_alpha = .5, edge_curved = .35, edge_width = 2, layout = c( "fr", "dh", "drl", "gem", "graphopt", "kk", "lgl", "mds", "sugiyama" ), coords = NULL, layout_args = list(), cluster = NULL, groups = NULL, cluster_config = list(), cluster_mark_groups = TRUE, cluster_color_vertices = FALSE, main = "", theme = choose_theme(getOption("rtemis_theme")), palette = getOption("rtemis_palette", "rtms"), mar = rep(0, 4), filename = NULL, verbosity = 1L, ... ) { # Dependencies ---- check_dependencies("igraph", "threejs") # Theme ---- check_is_S7(theme, Theme) if (is.character(palette)) { palette <- unname(unlist(get_palette(palette))) } # Vertex names ---- # by default use names in input net. if (!is.null(vertex_label)) { igraph::igraph.options(net, vertex_label = vertex_label) } # Layout ---- layout <- match.arg(layout) if (is.null(coords) && !is.null(layout)) { coords <- do.call( getFromNamespace(paste0("layout_with_", layout), "igraph"), c(list(net, dim = 3), layout_args) ) if (layout == "sugiyama") coords <- coords[["layout"]] } # Cluster ---- if (is.null(groups) && !is.null(cluster)) { groups <- do.call( getFromNamespace(paste0("cluster_", cluster), "igraph"), c(list(net), cluster_config) ) } if (is.null(vertex_col)) { vertex_col <- if (!is.null(groups)) { palette <- recycle(palette, length(unique(groups[["membership"]]))) palette[groups[["membership"]]] } else { theme[["fg"]] } } if (is.null(vertex_label_col)) { vertex_label_col <- theme[["fg"]] } vertex_label_col <- adjustcolor(vertex_label_col, vertex_label_alpha) # Leave edge_col as NULL for auto-coloring with groups if (is.null(edge_col) && is.null(groups)) { edge_col <- "#18A3AC" } # Plot ---- threejs::graphjs( net, layout = coords, vertex.color = vertex_col, vertex.size = vertex_size, vertex.shape = vertex_shape, vertex.label = vertex_label, edge.color = edge_col, edge.alpha = edge_alpha, edge.width = edge_width, main = main, bg = theme[["bg"]], vertex.label.color = vertex_label_col, vertex.frame.color = vertex_frame_col, edge.curved = edge_curved, vertex.label.family = theme[["font_family"]], font.main = theme[["font_family"]], stroke = NULL, verbosity = verbosity, ... ) } # /rtemis::draw_graphjs ================================================ FILE: R/draw_heatmap.R ================================================ # draw_heatmap.R # ::rtemis:: # 2017 EDG rtemis.org #' Interactive Heatmaps #' #' Draw interactive heatmaps using `heatmaply`. #' #' @details #' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation. #' 'heatmaply' unfortunately forces loading of the 'colorspace' namespace. #' #' @param x Input matrix. #' @param Rowv Logical or dendrogram. If Logical: Compute dendrogram and reorder rows. Defaults to FALSE. If dendrogram: use as is, without reordering. See more at `heatmaply::heatmaply("Rowv")`. #' @param Colv Logical or dendrogram. If Logical: Compute dendrogram and reorder columns. Defaults to FALSE. If dendrogram: use as is, without reordering. See more at `heatmaply::heatmaply("Colv")`. #' @param cluster Logical: If TRUE, set `Rowv` and `Colv` to TRUE. #' @param symm Logical: If TRUE, treat `x` symmetrically - `x` must be a square matrix. #' @param cellnote Matrix with values to be displayed on hover. Defaults to `ddSci(x)`. #' @param colorgrad_n Integer: Number of colors in gradient. Default = 101. #' @param colors Character vector: Colors to use in gradient. #' @param space Character: Color space to use. Default = "rgb". #' @param lo Character: Color for low values. Default = "#18A3AC". #' @param lomid Character: Color for low-mid values. #' @param mid Character: Color for mid values. #' @param midhi Character: Color for mid-high values. #' @param hi Character: Color for high values. Default = "#F48024". #' @param k_row Integer: Number of desired number of groups by which to color dendrogram branches in the rows. Default = 1. #' @param k_col Integer: Number of desired number of groups by which to color dendrogram branches in the columns. Default = 1. #' @param grid_gap Integer: Space between cells. Default = 0 (no space). #' @param limits Float, length 2: Determine color range. Default = NULL, which automatically centers values around 0. #' @param margins Float, length 4: Heatmap margins. #' @param main Character: Main title. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param key_title Character: Title for the color key. #' @param showticklabels Logical: If TRUE, show tick labels. #' @param colorbar_len Numeric: Length of the colorbar. #' @param row_side_colors Data frame: Column names will be label names, cells should be label colors. See `heatmaply::heatmaply("row_side_colors")`. #' @param row_side_palette Color palette function. See `heatmaply::heatmaply("row_side_palette")`. #' @param col_side_colors Data frame: Column names will be label names, cells should be label colors. See `heatmaply::heatmaply("col_side_colors")`. #' @param col_side_palette Color palette function. See `heatmaply::heatmaply("col_side_palette")`. #' @param font_size Numeric: Font size. #' @param padding Numeric: Padding between cells. #' @param displayModeBar Logical: If TRUE, display the plotly mode bar. #' @param modeBar_file_format Character: File format for image exports from the mode bar. #' @param filename Character: File name to save the plot. #' @param file_width Numeric: Width of exported image. #' @param file_height Numeric: Height of exported image. #' @param file_scale Numeric: Scale of exported image. #' @param plot_method Character: Plot method to use. Default = "plotly". #' @param theme `Theme` object. #' @param ... Additional arguments to be passed to `heatmaply::heatmaply`. #' #' @return `plotly` object.` #' #' @author EDG #' @export #' #' @examplesIf interactive() #' x <- rnormmat(200, 20) #' xcor <- cor(x) #' draw_heatmap(xcor) draw_heatmap <- function( x, Rowv = TRUE, Colv = TRUE, cluster = FALSE, symm = FALSE, cellnote = NULL, colorgrad_n = 101, colors = NULL, space = "rgb", lo = "#18A3AC", lomid = NULL, mid = NULL, midhi = NULL, hi = "#F48024", k_row = 1, k_col = 1, grid_gap = 0, limits = NULL, margins = NULL, main = NULL, xlab = NULL, ylab = NULL, key_title = NULL, showticklabels = NULL, colorbar_len = .7, plot_method = "plotly", theme = choose_theme(getOption("rtemis_theme")), row_side_colors = NULL, row_side_palette = NULL, col_side_colors = NULL, col_side_palette = NULL, font_size = NULL, padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, ... ) { # Dependencies ---- check_dependencies("heatmaply") # Colnames ---- if (is.null(colnames(x))) { colnames(x) <- seq_len(NCOL(x)) } if (is.null(rownames(x))) { rownames(x) <- seq_len(NROW(x)) } # Margins ---- # By default, allow 7 px per character if (is.null(margins)) { bottom <- max(nchar(colnames(x))) * 7 + 15 left <- max(nchar(rownames(x))) * 7 + 10 margins <- c(bottom, left, 50, 50) } # Tick Labels ---- if (is.null(showticklabels)) { showticklabels <- c( ifelse(NCOL(x) < 50, TRUE, FALSE), ifelse(NROW(x) < 50, TRUE, FALSE) ) } if (is.null(font_size)) { font_size <- 17.0769 - 0.2692 * ncol(x) } # Limits ---- if (is.null(limits)) { maxabs <- max(abs(x), na.rm = TRUE) if (.2 < maxabs && maxabs < 1) { maxabs <- 1 } limits <- c(-maxabs, maxabs) } # Theme ---- check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) fg <- plotly::toRGB(theme[["fg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]]) tick_labels_col <- plotly::toRGB(theme[["tick_labels_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) # Colors ---- if (is.null(mid)) { mid <- theme[["bg"]] } colors <- colorgrad( n = colorgrad_n, colors = colors, space = space, lo = lo, lomid = lomid, mid = mid, midhi = midhi, hi = hi ) # Cluster ---- if (cluster) { Rowv <- Colv <- TRUE } # Cellnote ---- if (!is.null(cellnote)) { if (cellnote == "values") cellnote <- matrix(ddSci(x), NROW(x), NCOL(x)) } # heatmaply ---- ggp2text <- ggplot2::element_text( family = theme[["font_family"]], color = theme[["tick_labels_col"]] ) ggp2theme <- ggplot2::theme( panel.background = ggplot2::element_rect(fill = theme[["bg"]]), plot.background = ggplot2::element_rect(fill = theme[["bg"]]), legend.text = ggplot2::element_text(color = theme[["fg"]]), legend.background = ggplot2::element_rect(fill = theme[["bg"]]), text = ggp2text, title = ggp2text, axis.text = ggp2text, axis.text.x = ggp2text, axis.text.y = ggp2text, axis.title.x = ggp2text, axis.title.y = ggp2text, plot.subtitle = ggp2text, plot.caption = ggp2text ) # Dendrogram ---- if (isTRUE(Rowv)) { Rowv <- x |> dist() |> hclust() |> as.dendrogram() |> dendextend::set("branches_k_color", k = 1) |> dendextend::set("branches_lwd", 1) |> dendextend::set("branches_col", fg) |> dendextend::ladderize() } if (isTRUE(Colv)) { Colv <- x |> t() |> dist() |> hclust() |> as.dendrogram() |> dendextend::set("branches_k_color", k = 1) |> dendextend::set("branches_lwd", 1) |> dendextend::set("branches_col", fg) |> dendextend::ladderize() } plt <- suppressWarnings(heatmaply::heatmaply( x, Rowv = Rowv, Colv = Colv, symm = symm, cellnote = cellnote, colors = colors, grid_gap = grid_gap, limits = limits, margins = margins, key_title = key_title, xlab = xlab, ylab = ylab, # main = main, k_row = k_row, k_col = k_col, plot_method = plot_method, colorbar_len = colorbar_len, showticklabels = showticklabels, heatmap_layers = ggp2theme, row_side_colors = row_side_colors, row_side_palette = row_side_palette, col_side_colors = col_side_colors, col_side_palette = col_side_palette # side_color_layers = ggp2theme, # file = filename )) # Layout ---- # '- layout ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) tickfont <- list( family = theme[["font_family"]], size = font_size, color = tick_labels_col ) .legend <- list( font = list( family = theme[["font_family"]], size = font_size, color = fg ) ) plt <- plotly::layout( plt, yaxis2 = list( title = list( font = f ), # gets assigned to dendrogram titlefont = f, tickcolor = bg, showline = FALSE, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickfont = tickfont ), xaxis = list( title = list( font = f ), titlefont = f, tickcolor = bg, showline = FALSE, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickfont = tickfont ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]] ), paper_bgcolor = bg, plot_bgcolor = plot_bg, legend = .legend ) # Manual theme colors ## y axis tick label colors # plt[["x"]][["layoutAttrs"]][[2]][["yaxis2"]][["tickfont"]][["color"]] ## x axis tick label colors # plt[["x"]][["layoutAttrs"]][[2]][["xaxis"]][["tickfont"]][["color"]] <- "rgba(255, 0, 0, 1)" ## edge lines must be invisible plt[["x"]][["layout"]][["yaxis"]][["linecolor"]] <- plt[["x"]][["layout"]][[ "xaxis2" ]][["linecolor"]] <- theme[["bg"]] # Manual layout ---- # Set padding plt[["sizingPolicy"]][["padding"]] <- padding # Config ---- plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ) ) ## Override colorbar tick font color to theme[["fg"]] plt[["x"]][["data"]][[3]][["colorbar"]][["tickfont"]] <- list( family = theme[["font_family"]], color = tick_labels_col ) plt[["x"]][["data"]][[3]][["colorbar"]][["tickcolor"]] <- theme[["tick_col"]] # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_heatmap ================================================ FILE: R/draw_leaflet.R ================================================ # draw_leaflet.R # ::rtemis:: # 2020 EDG rtemis.org #' Plot interactive choropleth map using \pkg{leaflet} #' #' @param fips Character vector: FIPS codes. (If numeric, it will be appropriately zero-padded). #' @param values Values to map to `fips`. #' @param names Character vector: Optional county names to appear on hover along `values`. #' @param fillOpacity Float: Opacity for fill colors. #' @param color_mapping Character: "Numeric" or "Bin". #' @param col_lo Overlay color mapped to lowest value. #' @param col_hi Overlay color mapped to highest value. #' @param col_na Color mapped to NA values. #' @param col_highlight Hover border color. #' @param col_interpolate Character: "linear" or "spline". #' @param col_bins Integer: Number of color bins to create if `color_mapping = "Bin"`. #' @param domain Limits for mapping colors to values. Default = NULL and set to range. #' @param weight Float: Weight of county border lines. #' @param color Color of county border lines. #' @param alpha Float: Overlay transparency. #' @param bg_tile_provider Background tile (below overlay colors), one of `leaflet::providers`. #' @param bg_tile_alpha Float: Background tile transparency. #' @param fg_tile_provider Foreground tile (above overlay colors), one of `leaflet::providers`. #' @param legend_position Character: One of: "topright", "bottomright", "bottomleft", "topleft". #' @param legend_alpha Float: Legend box transparency. #' @param legend_title Character: Defaults to name of `values` variable. #' @param init_lng Float: Center map around this longitude (in decimal form). Default = -98.54180833333334 (US geographic center). #' @param init_lat Float: Center map around this latitude (in decimal form). Default = 39.207413888888894 (US geographic center). #' @param init_zoom Integer: Initial zoom level (depends on device, i.e. window, size). #' @param stroke Logical: If TRUE, draw polygon borders. #' #' @return `leaflet` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' fips <- c(06075, 42101) #' population <- c(874961, 1579000) #' names <- c("SF", "Philly") #' draw_leaflet(fips, population, names) # NA in legend issue: https://github.com/rstudio/leaflet/issues/615 draw_leaflet <- function( fips, values, names = NULL, fillOpacity = 1, color_mapping = c("Numeric", "Bin"), col_lo = "#0290EE", col_hi = "#FE4AA3", col_na = "#303030", col_highlight = "#FE8A4F", col_interpolate = c("linear", "spline"), col_bins = 21, # for color_mapping Bin domain = NULL, weight = .5, color = "black", alpha = 1, bg_tile_provider = leaflet::providers[["CartoDB.Positron"]], bg_tile_alpha = .67, fg_tile_provider = leaflet::providers[["CartoDB.PositronOnlyLabels"]], legend_position = c( "topright", "bottomright", "bottomleft", "topleft" ), legend_alpha = .8, legend_title = NULL, init_lng = -98.54180833333334, init_lat = 39.207413888888894, init_zoom = 3, stroke = TRUE ) { # Dependencies ---- check_dependencies("leaflet", "geojsonio", "htmltools", "htmlwidgets", "sf") # Arguments ---- vals_name <- deparse(substitute(values)) color_mapping <- match.arg(color_mapping) col_interpolate <- match.arg(col_interpolate) palette <- colorRamp( colors = c(col_lo, col_hi), interpolate = col_interpolate ) legend_position <- match.arg(legend_position) if (is.null(legend_title)) { legend_title <- labelify(vals_name) } # State vs. County data ---- if (max(nchar(fips)) < 3) { geo <- readRDS( system.file( "extdata", "us-states.rds", package = "rtemis" ) ) fips <- if (is.character(fips)) { fips } else { sprintf("%02d", fips) } } else { geo <- readRDS( system.file( "extdata", "us-counties.rds", package = "rtemis" ) ) fips <- if (is.character(fips)) { fips } else { sprintf("%05d", fips) } } # Match input county-level data index <- match(geo[["id"]], fips) geo[["val"]] <- values[index] # Colorscale ---- if (color_mapping == "Numeric") { pal <- leaflet::colorNumeric( palette = palette, domain = domain, na.color = col_na, alpha = TRUE ) } else { pal <- leaflet::colorBin( palette = palette, domain = domain, na.color = col_na, bins = col_bins ) } # Hover labels ---- .labs <- values[index] if (!is.null(names)) { .names <- names[index] labels <- lapply(seq_len(NROW(geo)), function(i) { if (is.na(.labs[i])) { '
N/A
' } else { sprintf("%s
%g", .names[i], .labs[i]) } }) |> lapply(htmltools::HTML) } else { labels <- lapply(seq_len(NROW(geo)), function(i) { if (is.na(.labs[i])) { '
N/A
' } else { sprintf("%g", .labs[i]) } }) |> lapply(htmltools::HTML) } geo[["labels"]] <- labels # leaflet map ---- map <- leaflet::leaflet(geo) |> leaflet::addProviderTiles( provider = bg_tile_provider, options = leaflet::providerTileOptions(opacity = bg_tile_alpha) ) |> leaflet::addMapPane("polygons", zIndex = 410) |> leaflet::addMapPane("tiles", zIndex = 420) |> leaflet::addPolygons( fillColor = ~ pal(val), fillOpacity = fillOpacity, opacity = alpha, weight = weight, color = color, stroke = stroke, group = legend_title, options = leaflet::pathOptions(pane = "polygons"), highlight = leaflet::highlightOptions( weight = 2, color = col_highlight, bringToFront = TRUE ), label = labels, labelOptions = leaflet::labelOptions( style = list("font-weight" = "normal", padding = "2px 2px"), textsize = "15px", direction = "auto" ) ) |> leaflet::addProviderTiles( provider = fg_tile_provider, options = leaflet::pathOptions(pane = "tiles") ) |> leaflet::addLegend( position = legend_position, pal = pal, values = geo[["val"]], opacity = legend_alpha, title = legend_title ) |> leaflet::addLayersControl(overlayGroups = c(legend_title)) |> leaflet::setView(lng = init_lng, lat = init_lat, zoom = init_zoom) insert <- htmltools::tags[["style"]]( type = "text/css", "div.info.legend.leaflet-control br {clear: both;}" ) map <- htmlwidgets::prependContent(map, insert) map } # /rtemis:: draw_leaflet ================================================ FILE: R/draw_pie.R ================================================ # draw_pie.R # ::rtemis:: # 2019 EDG rtemis.org #' Interactive Pie Chart #' #' Draw interactive pie charts using `plotly`. #' #' @param x data.frame: Input: Either a) 1 numeric column with categories defined by rownames, or #' b) two columns, the first is category names, the second numeric or c) a numeric vector with categories defined using #' the `category.names` argument. #' @param main Character: Plot title. Default = NULL, which results in `colnames(x)[1]`. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param alpha Numeric: Alpha for the pie slices. #' @param bg Character: Background color. #' @param plot_bg Character: Plot background color. #' @param theme `Theme` object. #' @param palette Character vector: Colors to use. #' @param category_names Character, vector, length = NROW(x): Category names. Default = NULL, which uses #' either `rownames(x)`, or the first column of `x` if `ncol(x) = 2`. #' @param textinfo Character: Info to show over each slice: "label", "percent", "label+percent". #' @param font_size Integer: Font size for labels. #' @param labs_col Character: Color of labels. #' @param legend Logical: If TRUE, show legend. #' @param legend_col Character: Color for legend. #' @param sep_col Character: Separator color. #' @param margin List: Margin settings. #' @param padding Numeric: Padding between cells. #' @param displayModeBar Logical: If TRUE, display the plotly mode bar. #' @param modeBar_file_format Character: File format for image exports from the mode bar. #' @param filename Character: File name to save plot. #' @param file_width Integer: Width for saved file. #' @param file_height Integer: Height for saved file. #' @param file_scale Numeric: Scale for saved file. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' draw_pie(VADeaths[, 1, drop = FALSE]) draw_pie <- function( x, main = NULL, xlab = NULL, ylab = NULL, alpha = .8, bg = NULL, plot_bg = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), category_names = NULL, textinfo = "label+percent", font_size = 16, labs_col = NULL, legend = TRUE, legend_col = NULL, sep_col = NULL, margin = list(b = 50, l = 50, t = 50, r = 20), padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) { # Dependencies ---- check_dependencies("plotly") # Names ---- .input_name <- deparse(substitute(x)) .rownames <- rownames(x) .colnames <- colnames(x) x <- as.data.frame(x) .cat_names <- category_names if (NCOL(x) == 2) { .cat_names <- as.character(x[, 1]) x <- x[, 2, drop = FALSE] if (is.null(main)) main <- .colnames[2] } if (is.null(.cat_names)) { if (!is.null(.rownames)) { .cat_names <- .rownames } else { .cat_names <- LETTERS[seq_len(NROW(x))] } } if (is.null(main)) { if (!is.null(.colnames)) { main <- labelify(.colnames[1]) } else { main <- labelify(.input_name) } } if (!is.null(main)) { main <- paste0("", main, "") } # Colors ---- p <- NROW(x) col <- recycle(palette, seq_len(p)) # Theme ---- check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) if (is.null(legend_col)) { legend_col <- labs_col } sep_col <- if (is.null(sep_col)) bg else plotly::toRGB(sep_col) # plotly ---- plt <- plotly::plot_ly( labels = .cat_names, values = x[, 1], type = "pie", textinfo = textinfo, insidetextfont = list(color = "#FFFFFF"), outsidetextfont = list(color = labs_col), marker = list( colors = unlist(col), line = list(color = sep_col, width = 1) ) ) ## layout ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) .legend <- list( font = list( family = theme[["font_family"]], size = font_size, color = legend_col ) ) plt <- plotly::layout( plt, yaxis = list( title = ylab, showline = FALSE, titlefont = f, showgrid = FALSE, zeroline = FALSE ), xaxis = list( title = xlab, showline = FALSE, titlefont = f, showgrid = FALSE, zeroline = FALSE ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ) ), paper_bgcolor = bg, plot_bgcolor = plot_bg, margin = margin, showlegend = legend, legend = .legend ) # Padding plt[["sizingPolicy"]][["padding"]] <- padding # Config plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ) ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_pie.R ================================================ FILE: R/draw_protein.R ================================================ # draw_protein # ::rtemis:: # 2022- EDG rtemis.org #' Plot an amino acid sequence with annotations #' #' Plot an amino acid sequence with multiple site and/or region annotations. #' #' @param x Character vector: amino acid sequence (1-letter abbreviations) OR #' `a3` object OR Character: path to JSON file OR Character: UniProt accession number. #' @param site Named list of lists with indices of sites. These will be #' highlighted by coloring the border of markers. #' @param region Named list of lists with indices of regions. These will be #' highlighted by coloring the markers and lines of regions using the #' `palette` colors. #' @param ptm List of post-translational modifications. #' @param cleavage_site List of cleavage sites. #' @param variant List of variant information. #' @param disease_variants List of disease variant information. #' @param n_per_row Integer: Number of amino acids to show per row. #' @param main Character: Main title. #' @param main_xy Numeric vector, length 2: x and y coordinates for title. #' e.g. if `main_xref` and `main_yref` are `"paper"`: #' `c(0.055, .975)` is top left, `c(.5, .975)` is top and #' middle. #' @param main_xref Character: xref for title. #' @param main_yref Character: yref for title. #' @param main_xanchor Character: xanchor for title. #' @param main_yanchor Character: yanchor for title. #' @param layout Character: "1curve", "grid": type of layout to use. #' @param show_markers Logical: If TRUE, show amino acid markers. #' @param show_labels Logical: If TRUE, annotate amino acids with elements. #' @param font_size Integer: Font size for labels. #' @param label_col Color for labels. #' @param scatter_mode Character: Mode for scatter plot. #' @param marker_size Integer: Size of markers. #' @param marker_col Color for markers. #' @param marker_alpha Numeric: Alpha for markers. #' @param marker_symbol Character: Symbol for markers. #' @param line_col Color for lines. #' @param line_alpha Numeric: Alpha for lines. #' @param line_width Numeric: Width for lines. #' @param show_full_names Logical: If TRUE, show full names of amino acids. #' @param region_scatter_mode Character: Mode for scatter plot. #' @param region_style Integer: Style for regions. #' @param region_marker_size Integer: Size of region markers. #' @param region_marker_alpha Numeric: Alpha for region markers. #' @param region_marker_symbol Character: Symbol for region markers. #' @param region_line_dash Character: Dash for region lines. #' @param region_line_shape Character: Shape for region lines. #' @param region_line_smoothing Numeric: Smoothing for region lines. #' @param region_line_width Numeric: Width for region lines. #' @param region_line_alpha Numeric: Alpha for region lines. #' @param theme `Theme` object. #' @param region_palette Named list of colors for regions. #' @param region_outline_only Logical: If TRUE, only show outline of regions. #' @param region_outline_pad Numeric: Padding for region outline. #' @param region_pad Numeric: Padding for region. #' @param region_fill_alpha Numeric: Alpha for region fill. #' @param region_fill_shape Character: Shape for region fill. #' @param region_fill_smoothing Numeric: Smoothing for region fill. #' @param bpadcx Numeric: Padding for region border. #' @param bpadcy Numeric: Padding for region border. #' @param site_marker_size Integer: Size of site markers. #' @param site_marker_symbol Character: Symbol for site markers. #' @param site_marker_alpha Numeric: Alpha for site markers. #' @param site_border_width Numeric: Width for site borders. #' @param site_palette Named list of colors for sites. #' @param variant_col Color for variants. #' @param disease_variant_col Color for disease variants. #' @param showlegend_ptm Logical: If TRUE, show legend for PTMs. #' @param ptm_col Named list of colors for PTMs. #' @param ptm_symbol Character: Symbol for PTMs. #' @param ptm_offset Numeric: Offset for PTMs. #' @param ptm_pad Numeric: Padding for PTMs. #' @param ptm_marker_size Integer: Size of PTM markers. #' @param clv_col Color for cleavage site annotations. #' @param clv_symbol Character: Symbol for cleavage site annotations. #' @param clv_offset Numeric: Offset for cleavage site annotations. #' @param clv_pad Numeric: Padding for cleavage site annotations. #' @param clv_marker_size Integer: Size of cleavage site annotation markers. #' @param annotate_position_every Integer: Annotate every nth position. #' @param annotate_position_alpha Numeric: Alpha for position annotations. #' @param annotate_position_ay Numeric: Y offset for position annotations. #' @param position_font_size Integer: Font size for position annotations. #' @param legend_xy Numeric vector, length 2: x and y coordinates for legend. #' @param legend_xanchor Character: xanchor for legend. #' @param legend_yanchor Character: yanchor for legend. #' @param legend_orientation Character: Orientation for legend. #' @param legend_col Color for legend. #' @param legend_bg Color for legend background. #' @param legend_border_col Color for legend border. #' @param legend_borderwidth Numeric: Width for legend border. #' @param legend_group_gap Numeric: Gap between legend groups. #' @param margin List: Margin settings. #' @param showgrid_x Logical: If TRUE, show x grid. #' @param showgrid_y Logical: If TRUE, show y grid. #' @param automargin_x Logical: If TRUE, use automatic margin for x axis. #' @param automargin_y Logical: If TRUE, use automatic margin for y axis. #' @param xaxis_autorange Logical: If TRUE, use automatic range for x axis. #' @param yaxis_autorange Character: If TRUE, use automatic range for y axis. #' @param scaleanchor_y Character: Scale anchor for y axis. #' @param scaleratio_y Numeric: Scale ratio for y axis. #' @param hoverlabel_align Character: Alignment for hover label. #' @param displayModeBar Logical: If TRUE, display mode bar. #' @param modeBar_file_format Character: File format for mode bar. #' @param scrollZoom Logical: If TRUE, enable scroll zoom. #' @param filename Character: File name to save plot. #' @param file_width Integer: Width for saved file. #' @param file_height Integer: Height for saved file. #' @param file_scale Numeric: Scale for saved file. #' @param width Integer: Width for plot. #' @param height Integer: Height for plot. #' @param verbosity Integer: Verbosity level. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examples #' \dontrun{ #' # Reads sequence from UniProt server #' tau <- seqinr::read.fasta("https://rest.uniprot.org/uniprotkb/P10636.fasta", #' seqtype = "AA" #' ) #' draw_protein(as.character(tau[[1]])) #' #' # or directly using the UniProt accession number: #' draw_protein("P10636") #' } draw_protein <- function( x, site = NULL, region = NULL, ptm = NULL, cleavage_site = NULL, variant = NULL, disease_variants = NULL, # label_group = NULL, n_per_row = NULL, main = NULL, main_xy = c(0.055, .975), main_xref = "paper", main_yref = "paper", main_xanchor = "middle", main_yanchor = "top", layout = c("simple", "grid", "1curve", "2curve"), show_markers = TRUE, show_labels = TRUE, font_size = 18, label_col = NULL, scatter_mode = "markers+lines", # AA marker marker_size = 28, marker_col = NULL, # "gray18", marker_alpha = 1, marker_symbol = "circle", # AA line line_col = NULL, # "gray18", line_alpha = 1, line_width = 2, # Hover names show_full_names = TRUE, # regions region_scatter_mode = "markers+lines", region_style = 3, region_marker_size = marker_size, region_marker_alpha = .6, region_marker_symbol = "circle", region_line_dash = "solid", region_line_shape = "line", region_line_smoothing = 1, region_line_width = 1, region_line_alpha = .6, theme = choose_theme(getOption("rtemis_theme")), region_palette = getOption("rtemis_palette", "rtms"), region_outline_only = FALSE, region_outline_pad = 2, # for fake polys region_pad = .35, # for real polys region_fill_alpha = .1666666, region_fill_shape = "line", region_fill_smoothing = 1, bpadcx = .5, bpadcy = .5, # Sites - colored marker border site_marker_size = marker_size, site_marker_symbol = marker_symbol, site_marker_alpha = 1, site_border_width = 1.5, site_palette = getOption("rtemis_palette", "rtms"), # Variants variant_col = "#FA6E1E", # Text groups disease_variant_col = "#E266AE", # "#c982d7" # PTMs showlegend_ptm = TRUE, ptm_col = NULL, ptm_symbol = "circle", ptm_offset = .12, ptm_pad = .35, ptm_marker_size = marker_size / 4.5, # Cleavage sites clv_col = NULL, clv_symbol = "triangle-down", clv_offset = .12, clv_pad = .35, clv_marker_size = marker_size / 4, # Position annotations annotate_position_every = 10, annotate_position_alpha = .5, annotate_position_ay = -.4 * marker_size, position_font_size = font_size - 6, # Legend legend_xy = c(.97, .954), legend_xanchor = "left", legend_yanchor = "top", legend_orientation = "v", legend_col = NULL, legend_bg = "#FFFFFF00", legend_border_col = "#FFFFFF00", legend_borderwidth = 0, legend_group_gap = 0, margin = list(b = 0, l = 0, t = 0, r = 0, pad = 0), # Axes showgrid_x = FALSE, showgrid_y = FALSE, automargin_x = TRUE, automargin_y = TRUE, xaxis_autorange = TRUE, yaxis_autorange = "reversed", scaleanchor_y = "x", scaleratio_y = 1, # Layout hoverlabel_align = "left", # config displayModeBar = TRUE, modeBar_file_format = "svg", scrollZoom = TRUE, # file out filename = NULL, file_width = 1320, file_height = 990, file_scale = 1, width = NULL, height = NULL, verbosity = 1L ) { # Data ---- if (inherits(x, "A3")) { dat <- x x <- dat[["sequence"]] site <- iflengthy(dat[["annotations"]][["site"]]) region <- iflengthy(dat[["annotations"]][["region"]]) ptm <- iflengthy(dat[["annotations"]][["ptm"]]) cleavage_site <- iflengthy(dat[["annotations"]][["cleavage_site"]]) variant <- iflengthy(dat[["annotations"]][["variant"]]) disease_variants <- iflengthy(dat[["annotations"]][["site"]][[ "disease_associated_variant" ]]) } if (length(x) == 1) { if (grepl(".json$", x)) { dat <- jsonlite::read_json( x, simplifyVector = TRUE, simplifyMatrix = FALSE ) x <- dat[["sequence"]] disease_variants <- dat[["annotations"]][["site"]][[ "disease_associated_variant" ]] site <- dat[["annotations"]][["site"]] region <- dat[["annotations"]][["region"]] ptm <- dat[["annotations"]][["ptm"]] cleavage_site <- dat[["annotations"]][["cleavage_site"]] } else { dat <- uniprot_get(x, verbosity = verbosity) x <- dat[["sequence"]] # if (is.null(main)) main <- dat[["identifier"]] } } x <- toupper(x) position <- seq_along(x) n <- length(x) if (is.null(n_per_row)) { n_per_row <- ceiling(sqrt(n)) } # Arguments ---- layout <- match.arg(layout) # Coordinates ---- if (layout == "grid") { # '- grid ---- # 1:n_per_row, n_per_row:1, till n xs <- rep(c(1:n_per_row, n_per_row:1), length.out = n) nrows <- ceiling(n / n_per_row) ys <- rep(1:nrows, each = n_per_row, length = n) } else if (layout == "1curve") { # '- 1curve ---- xs <- rep(c(1:n_per_row, (n_per_row - 1):2), length.out = n) nrows <- ceiling(1 + (n / n_per_row - 1)) ys <- c( 1, rep(seq(1, nrows * 4, 3), each = n_per_row - 1, length = n - 1) ) # drop the n_per_row, then n_per_row - 1 ys[seq(n_per_row, n, n_per_row - 1)] <- ys[seq(n_per_row, n, n_per_row - 1)] + 1.5 } else if (layout == "simple") { # '- simple ---- # if each point is 1 unit apart, border points must be sqrt(3)/2 away xs <- rep(c(1:n_per_row, (n_per_row - 1):2), length.out = n) nrows <- ceiling(1 + (n / n_per_row)) ys <- c( 1, rep(seq(1, nrows), each = n_per_row - 1, length = n - 1) ) # every n_per_row, move to .5 up and sqrt(3)/2 right, left from previous # Right border ys[seq(n_per_row, n, (2 * n_per_row - 2))] <- ys[seq(n_per_row, n, (2 * n_per_row - 2))] + .5 xs[seq(n_per_row, n, (2 * n_per_row - 2))] <- xs[seq(n_per_row, n, 2 * n_per_row - 2)] - 1 + sqrt(3) / 2 # Left border ys[seq((2 * n_per_row) - 1, n, (2 * n_per_row - 2))] <- ys[seq((2 * n_per_row) - 1, n, (2 * n_per_row - 2))] + .5 xs[seq((2 * n_per_row) - 1, n, (2 * n_per_row - 2))] <- xs[seq((2 * n_per_row) - 1, n, (2 * n_per_row - 2))] + 1 - sqrt(3) / 2 } else if (layout == "2curve") { # '- 2curve ---- xs <- rep(c(1:n_per_row, n_per_row:1), length.out = n) nrows <- ceiling(n / n_per_row) ys <- rep(1:nrows * 3 - 2, each = n_per_row, length = n) ys[seq(n_per_row, n, n_per_row)] <- ys[seq(n_per_row, n, n_per_row)] + 1 ys[seq(n_per_row, n, n_per_row) + 1] <- ys[seq(n_per_row, n, n_per_row)] + 1 } # Theme ---- check_is_S7(theme, Theme) if (is.null(label_col)) { label_col <- theme[["fg"]] } label_col <- recycle(label_col, x) if (is.null(marker_col)) { marker_col <- color_fade(theme[["fg"]], theme[["bg"]], .9) } marker_col <- plotly::toRGB(marker_col, alpha = marker_alpha) if (is.null(line_col)) { line_col <- color_fade(theme[["fg"]], theme[["bg"]], .9) } line_col <- plotly::toRGB(line_col, alpha = marker_alpha) main_col <- plotly::toRGB(theme[["main_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) if (is.null(legend_col)) { legend_col <- labs_col } grid_col <- plotly::toRGB(theme[["grid_col"]], theme[["grid_alpha"]]) # Palette ---- if (is.character(region_palette)) { region_palette <- get_palette(region_palette) } if (is.character(site_palette)) { site_palette <- get_palette(site_palette) } # Match abbreviations to full names ---- if (show_full_names) { input <- switch(max(nchar(x)), "1" = "1", "3" = "3", "full") if (input == "full") { xnames <- x } else { if (input == "1") { xnames <- factor( x, levels = aa[["Abbreviation1"]], labels = aa[["Name"]] ) |> as.character() } else { xnames <- factor( x, levels = toupper(aa[["Abbreviation3"]]), labels = aa[["Name"]] ) |> as.character() } } } else { xnames <- x } # Variants: overwrite xnames with tooltip info if (!is.null(variant)) { for (i in seq_along(variant)) { varidi <- variant[[i]][["position"]] xnames[varidi] <- paste0( xnames[varidi], "\n\n", list2html(variant[[i]], col = variant_col) ) } } # plotly ---- plt <- plotly::plot_ly( width = width, height = height ) # AA markers and lines ---- aaname <- if (is.null(disease_variants)) { "1° structure" } else { paste0( "1° structure (", "Disease variants)" ) } if (show_markers) { clvtext <- if (!is.null(cleavage_site)) { # Get cleavage sites for each amino acid sapply(position, \(i) { if (i %in% unlist(cleavage_site)) { paste0( "\nCleavage site for:\n", paste0( names(cleavage_site)[sapply(cleavage_site, \(x) i %in% x)], collapse = "\n" ) ) } else { "" } }) } else { NULL } plt <- plt |> plotly::add_trace( x = xs, y = ys, type = "scatter", mode = scatter_mode, marker = list( color = plotly::toRGB(marker_col, alpha = marker_alpha), size = marker_size, symbol = marker_symbol ), line = list( color = plotly::toRGB(line_col, alpha = line_alpha), width = line_width ), text = paste0(position, ": ", xnames, clvtext), name = aaname, # hoverinfo = marker.hoverinfo hoverinfo = "text" ) } # regions ---- if (!is.null(region)) { region_names <- names(region) if (is.null(region_names)) { region_names <- paste("region", seq_along(region)) } if (region_style == 1) { # '- region style 1 ---- # for overlapping sets within each region for (i in seq_along(region)) { for (j in seq_along(region[[i]])) { plt <- plt |> plotly::add_trace( x = xs[region[[i]][[j]]], y = ys[region[[i]][[j]]], type = "scatter", mode = region_scatter_mode, marker = list( color = plotly::toRGB( region_palette[[i]], alpha = region_marker_alpha ), size = region_marker_size, symbol = region_marker_symbol ), line = list( color = plotly::toRGB( region_palette[[i]], alpha = region_line_alpha ), dash = region_line_dash, shape = region_line_shape, smoothing = region_line_smoothing, width = region_line_width ), name = region_names[i], legendgroup = region_names[i], showlegend = j == 1 ) if (region_outline_only) { # simulate rounded selection around AAs # need region_marker_size & line_width > marker_size plt <- plt |> plotly::add_trace( x = xs[region[[i]][[j]]], y = ys[region[[i]][[j]]], type = "scatter", mode = region_scatter_mode, marker = list( color = plotly::toRGB( # marker_col, theme[["bg"]], alpha = marker_alpha ), size = region_marker_size - region_outline_pad, symbol = region_marker_symbol ), line = list( color = plotly::toRGB( # line_col, theme[["bg"]], alpha = line_alpha ), shape = region_line_shape, smoothing = region_line_smoothing, width = region_line_width - region_outline_pad ), name = NULL, legendgroup = region_names[i], showlegend = FALSE ) plt <- plt |> plotly::add_trace( x = xs[region[[i]][[j]]], y = ys[region[[i]][[j]]], type = "scatter", mode = scatter_mode, marker = list( color = plotly::toRGB(marker_col, alpha = marker_alpha), size = marker_size, symbol = marker_symbol ), line = list( color = plotly::toRGB(line_col, alpha = line_alpha), width = line_width ), name = NULL, legendgroup = region_names[i], showlegend = FALSE ) } } } } else if (region_style == 2) { # '- region style 2 ---- # for non-overlapping sets within each region for (i in seq_along(region)) { plt <- plt |> plotly::add_trace( x = xs[unlist(region[[i]])], y = ys[unlist(region[[i]])], type = "scatter", mode = "markers", marker = list( color = plotly::toRGB( region_palette[[i]], alpha = region_marker_alpha ), size = region_marker_size, symbol = region_marker_symbol ), name = region_names[i] ) } } else { # '- region style 3 ---- # for 1curve only # region polys: get marker direction and location: # left, leftborder, right, rightborder dl <- c( "r", rep(c("r", "l"), each = n_per_row - 1, length = n - 1) ) dl[seq(n_per_row, n, n_per_row - 1)] <- paste0(dl[seq(n_per_row, n, n_per_row - 1)], "b") # i: IDI of region group for (i in seq_along(region)) { # each region's directions region_dl <- lapply(seq_along(region[[i]]), \(j) { dl[region[[i]][[j]]] }) region_poly_xy <- lapply(seq_along(region[[i]]), \(j) { poly_xys( xs = xs[region[[i]][[j]]], ys = ys[region[[i]][[j]]], d = region_dl[[j]], pad = region_pad, bpadcx = bpadcx, bpadcy = bpadcy ) }) for (j in seq_along(region[[i]])) { plt <- plt |> plotly::add_polygons( x = region_poly_xy[[j]][["px"]], y = region_poly_xy[[j]][["py"]], line = list( color = region_palette[[i]], width = region_line_width, shape = region_fill_shape, smoothing = region_fill_smoothing ), fillcolor = plotly::toRGB( region_palette[[i]], alpha = region_fill_alpha ), name = region_names[i], legendgroup = region_names[i], showlegend = j == 1 ) } } # each region's individual regions' coords } } # /regions # Sites ---- if (!is.null(site)) { site_names <- names(site) if (is.null(site_names)) { site_names <- paste("Site", seq_along(site)) } # for overlapping sets within each region for (i in seq_along(site)) { for (j in seq_along(site[[i]])) { plt <- plt |> plotly::add_trace( x = xs[site[[i]][[j]]], y = ys[site[[i]][[j]]], type = "scatter", mode = "markers", marker = list( color = plotly::toRGB( "#000000", alpha = 0 ), size = site_marker_size, symbol = site_marker_symbol, line = list( color = plotly::toRGB( site_palette[[i]], alpha = site_marker_alpha ), width = site_border_width ) ), name = site_names[i], legendgroup = site_names[i], showlegend = j == 1 ) } } } # /sites # PTMs ---- # Note: Do not show both PTMs and cleavage sites using the same padding if (!is.null(ptm)) { if (verbosity > 1L) { msg_info("Adding PTM markers...") } if (is.null(ptm_col)) { ptm_col <- 1 + seq_along(ptm) } ptm_symbol <- recycle(ptm_symbol, ptm) ptm_names <- names(ptm) for (i in seq_along(ptm)) { polyoffset <- npad(i, n = length(ptm), pad = ptm_pad) plt <- plt |> plotly::add_trace( x = xs[ptm[[i]]] + polyoffset[1], y = ys[ptm[[i]]] + polyoffset[2], type = "scatter", mode = "markers", marker = list( color = plotly::toRGB(ptm_col[[i]]), size = ptm_marker_size, symbol = ptm_symbol[i] ), name = ptm_names[i], showlegend = showlegend_ptm ) } } # Cleavage sites ---- # Note: Do not show both PTMs and cleavage sites using the same padding if (!is.null(cleavage_site)) { if (verbosity > 1L) { msg_info("Adding cleavage site markers...") } if (is.null(clv_col)) { clv_col <- c( colorspace::qualitative_hcl( (length(cleavage_site)), h = c(40, 360), c = 120, l = 50 ) ) } clv_symbol <- recycle(clv_symbol, cleavage_site) clv_names <- names(cleavage_site) for (i in seq_along(cleavage_site)) { polyoffset <- npad(i, n = length(cleavage_site), pad = clv_pad) plt <- plt |> plotly::add_trace( x = xs[cleavage_site[[i]]] + polyoffset[1], y = ys[cleavage_site[[i]]] + polyoffset[2], type = "scatter", mode = "markers", marker = list( color = plotly::toRGB(clv_col[[i]]), size = clv_marker_size, symbol = clv_symbol[i] ), name = clv_names[i], showlegend = showlegend_ptm ) } } # AA labels ---- if (show_labels) { # Variants if (!is.null(variant)) { variant_idi <- sapply(variant, \(v) v[["position"]]) label_col[variant_idi] <- variant_col } # Disease variants if (!is.null(disease_variants)) { label_col[disease_variants] <- disease_variant_col } label_group <- factor(label_col) label_group_col <- levels(label_group) for (i in seq_along(label_group_col)) { idx <- label_group == label_group_col[i] plt <- plt |> plotly::add_annotations( xref = "x", yref = "y", x = xs[idx], y = ys[idx], text = x[idx], font = list( family = theme[["font_family"]], size = font_size, color = label_group_col[[i]] ), showarrow = FALSE # name = label_group.levels[[i]], # showlegend = nchar(label_group.levels[[i]]) > 0 ) } # } } # Position annotations ---- if ( !is.null(annotate_position_every) && length(x) > annotate_position_every ) { idxpos <- seq(annotate_position_every, n, annotate_position_every) plt <- plt |> plotly::add_annotations( x = xs[idxpos], y = ys[idxpos], xref = "x", yref = "y", xanchor = "middle", yanchor = "bottom", ax = 0, ay = annotate_position_ay, text = idxpos, showarrow = TRUE, arrowcolor = "#ffffff00", font = list( size = position_font_size, family = theme[["font_family"]], color = plotly::toRGB(theme[["fg"]], alpha = annotate_position_alpha) ) ) } # Layout ---- .legend <- list( x = legend_xy[1], xanchor = legend_xanchor, y = legend_xy[2], yanchor = legend_yanchor, font = list( family = theme[["font_family"]], size = font_size, color = legend_col ), orientation = legend_orientation, bgcolor = plotly::toRGB(legend_bg), bordercolor = plotly::toRGB(legend_border_col), borderwidth = legend_borderwidth, tracegroupgap = legend_group_gap ) plt <- plotly::layout( plt, xaxis = list( autorange = xaxis_autorange, showgrid = showgrid_x, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], zeroline = FALSE, showticklabels = FALSE, automargin = automargin_x ), yaxis = list( autorange = yaxis_autorange, showgrid = showgrid_y, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], zeroline = FALSE, showticklabels = FALSE, automargin = automargin_y, scaleanchor = scaleanchor_y, scaleratio = scaleratio_y ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = main_xref, yref = main_yref, xanchor = main_xanchor, yanchor = main_yanchor, x = main_xy[1], y = main_xy[2] ), paper_bgcolor = theme[["bg"]], plot_bgcolor = theme[["plot_bg"]], margin = margin, legend = .legend, hoverlabel = list( align = hoverlabel_align ) ) # Config plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ), scrollZoom = TRUE ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly return(plt) } # /rtemis::draw_protein aa <- data.frame( Abbreviation1 = c( "A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V", "B", "Z", "X", "" ), Abbreviation3 = c( "Ala", "Arg", "Asn", "Asp", "Cys", "Gln", "Glu", "Gly", "His", "Ile", "Leu", "Lys", "Met", "Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val", "Asx", "Glx", "Xaa", "TERM" ), Name = c( "Alanine", "Arginine", "Asparagine", "Aspartate", "Cysteine", "Glutamine", "Glutamate", "Glycine", "Histidine", "Isoleucine", "Leucine", "Lysine", "Methionine", "Phenylalanine", "Proline", "Serine", "Threonine", "Tryptophan", "Tyrosine", "Valine", "Aspartic acid or Asparagine", "Glutamine or Glutamic acid", "(Any)", "Termination codon" ) ) poly_xys <- function(xs, ys, d, pad = 1, bpadcx = .5, bpadcy = .5) { n <- length(xs) dk <- rep(1, n) kinks <- which("rb" == d | "lb" == d) for (i in kinks) { if ((i + 1) <= n) { dk[(i + 1):n] <- -dk[(i + 1):n] } } # première ---- px_1 <- switch( d[1], "r" = xs[1] - pad, "l" = xs[1] + pad, "rb" = c(xs[1] - pad, xs[1]), "lb" = c(xs[1] + pad, xs[1]) ) py_1 <- switch( d[1], "rb" = rep(ys[1] - pad, 2), "lb" = rep(ys[1] - pad, 2), ys[1] - pad ) # aller ---- # k: IDI of individual amino acid within individual region px_aller <- sapply(seq_along(d), \(k) { if (d[k] == "rb") { # rep(xs[k] + sqrt(.5 * pad^2), 2) rep(xs[k] + pad, 2) } else if (d[k] == "lb") { # rep(xs[k] - sqrt(.5 * pad^2), 2) rep(xs[k] - pad, 2) } else { xs[k] } }) |> unlist() py_aller <- sapply(seq_along(d), \(k) { if (d[[k]] %in% c("l", "r")) { if (dk[k] == -1) { ys[k] + pad } else { ys[k] - pad } } else { if (k == 1) { c(ys[k] - pad, ys[k] + sqrt(.5 * pad^2)) } else if (k == length(d)) { c(ys[k] - sqrt(.5 * pad^2), ys[k] + pad) } else { c(ys[k] - sqrt(.5 * pad^2), ys[k] + sqrt(.5 * pad^2)) } } }) |> unlist() # centre ---- dr <- rev(d) dkr <- rev(dk) xsr <- rev(xs) ysr <- rev(ys) px_centre <- switch( dr[1], "r" = rep(xsr[1] + pad, 2), "l" = rep(xsr[1] - pad, 2), # "rb" = c(xsr[1], xsr[1] - sqrt(.5 * pad^2)), # "lb" = c(xsr[1], xsr[1] + sqrt(.5 * pad^2)) "rb" = c(xsr[1], xsr[1] - pad), "lb" = c(xsr[1], xsr[1] + pad) ) py_centre <- if (dr[1] %in% c("r", "l")) { if (length(kinks) > 0) { c(ysr[1] + pad, ysr[1] - pad) } else { c(ysr[1] - pad, ysr[1] + pad) } } else { rep(ysr[1] + pad, 2) } # retour ---- px_retour <- sapply(seq_along(dr), \(k) { if (dr[k] == "rb") { if (k == 1 | k == length(dr)) { rep(xsr[k] - pad, 2) } else { rep(xsr[k] - 1.5 * sqrt(.5 * pad^2), 2) } } else if (dr[k] == "lb") { if (k == 1 | k == length(dr)) { rep(xsr[k] + pad, 2) } else { rep(xsr[k] + 1.5 * sqrt(.5 * pad^2), 2) } } else { xsr[k] } }) |> unlist() py_retour <- sapply(seq_along(dr), \(k) { if (dr[[k]] %in% c("l", "r")) { if (dkr[k] == -1) { ysr[k] - pad } else { ysr[k] + pad } } else { rep(ysr[k], 2) } }) |> unlist() # find point before and after rb/lb idirb <- which(d == "rb") if (length(idirb) > 0) { if (idirb > 1) { px_aller[idirb - 1] <- px_aller[idirb - 1] + sqrt(.5 * pad^2) } if ((idirb + 1) <= length(d)) { px_aller[idirb + 2] <- px_aller[idirb + 2] + sqrt(.5 * pad^2) } } idilb <- which(d == "lb") if (length(idilb) > 0) { if (idilb > 1) { px_aller[idilb - 1] <- px_aller[idilb - 1] - sqrt(.5 * pad^2) } if ((idilb + 1) <= length(d)) { px_aller[idilb + 2] <- px_aller[idilb + 2] - sqrt(.5 * pad^2) } } # pénultième ---- py_pen <- if (d[1] %in% c("rb", "lb")) { ys[1] - sqrt(.5 * pad^2) } else { ys[1] + pad } # out ---- list( px = c(px_1, px_aller, px_centre, px_retour, px_1[1], px_1[1]), py = c(py_1, py_aller, py_centre, py_retour, py_pen, py_1[1]) ) } qrtpad <- function(i, pad = .3) { qrt <- sqrt(.5 * pad^2) switch( i, `1` = c(qrt, -qrt), `2` = c(pad, 0), `3` = c(qrt, qrt), `4` = c(0, pad), `5` = c(-qrt, qrt), `6` = c(-pad, 0), `7` = c(-qrt, -qrt) ) } # npad: function to calculate circular offset of a point from the center of a region # by dividing circle into n equal parts, beginning from the top npad <- function(i, n = 12, pad = .3) { angle <- 2 * pi / n x <- sin(angle * i) * pad y <- cos(angle * i) * pad c(x, y) } ================================================ FILE: R/draw_pvals.R ================================================ # draw_pvals.R # ::rtemis:: # 2021 EDG rtemis.org #' Barplot p-values using [draw_bar] #' #' Plot 1 - p-values as a barplot #' #' @param x Float, vector: p-values. #' @param xnames Character, vector: feature names. #' @param yname Character: outcome name. #' @param p_adjust_method Character: method for [p.adjust]. #' @param pval_hline Float: Significance level at which to plot horizontal line. #' @param hline_col Color for `pval_hline`. #' @param hline_dash Character: type of line to draw. #' @param ... Additional arguments passed to [draw_bar]. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' draw_pvals(c(0.01, 0.02, 0.03), xnames = c("Feature1", "Feature2", "Feature3")) draw_pvals <- function( x, xnames = NULL, yname = NULL, p_adjust_method = "none", pval_hline = .05, hline_col = rt_red, hline_dash = "dash", ... ) { if (is.null(xnames)) { xnames <- names(x) } if (is.null(yname)) { yname <- deparse(substitute(x)) } draw_bar( 1 - p.adjust(x, method = p_adjust_method), group_names = xnames, legend = FALSE, ylab = if (p_adjust_method == "none") { "1 - p-value" } else { paste0("1 - ", p_adjust_method, "-adjusted p-value") }, hline = 1 - pval_hline, hline_col = hline_col, hline_dash = hline_dash, ... ) } # /rtemis::draw_pvals ================================================ FILE: R/draw_roc.R ================================================ # draw_roc.R # ::rtemis:: # 2025 EDG rtemis.org #' Draw ROC curve #' #' @param true_labels Factor: True outcome labels. #' @param predicted_prob Numeric vector \[0, 1\]: Predicted probabilities for the positive class (i.e. second level of outcome). #' Or, for multiclass, a matrix of predicted probabilities with one column per class. #' Or, a list of such vectors/matrices to draw multiple ROC curves on the same plot. #' @param multiclass_fill_labels Logical: If TRUE, fill in labels for multiclass ROC curves. #' If FALSE, column names of `predicted_prob` must match levels of `true_labels`. #' @param main Character: Main title for the plot. #' @param theme `Theme` object. #' @param palette Character vector: Colors to use. #' @param legend Logical: If TRUE, draw legend. #' @param legend_title Character: Title for the legend. #' @param legend_xy Numeric vector: Position of the legend in the form c(x, y). #' @param legend_xanchor Character: X anchor for the legend. #' @param legend_yanchor Character: Y anchor for the legend. #' @param auc_dp Integer: Number of decimal places for AUC values. #' @param xlim Numeric vector: Limits for the x-axis. #' @param ylim Numeric vector: Limits for the y-axis. #' @param diagonal Logical: If TRUE, draw diagonal line. #' @param diagonal_col Character: Color for the diagonal line. #' @param axes_square Logical: If TRUE, make axes square. #' @param filename Character: If provided, save the plot to this file. #' @param ... Additional arguments passed to [draw_scatter]. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # Binary classification #' true_labels <- factor(c("A", "B", "A", "A", "B", "A", "B", "B", "A", "B")) #' predicted_prob <- c(0.1, 0.4, 0.35, 0.8, 0.65, 0.2, 0.9, 0.55, 0.3, 0.7) #' draw_roc(true_labels, predicted_prob) draw_roc <- function( true_labels, predicted_prob, multiclass_fill_labels = TRUE, main = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), legend = TRUE, legend_title = "Group (AUC)", legend_xy = c(1, 0), legend_xanchor = "right", legend_yanchor = "bottom", auc_dp = 3L, xlim = c(-0.05, 1.05), ylim = c(-0.05, 1.05), diagonal = TRUE, diagonal_col = NULL, axes_square = TRUE, filename = NULL, ... ) { # List of probabilities probl <- if (!is.list(predicted_prob)) { list(predicted_prob) } else { predicted_prob } labelsl <- if (!is.list(true_labels)) { list(true_labels) } else { true_labels } # Check N sets if (length(probl) != length(labelsl)) { cli::cli_abort( "You must have the same N of sets of `predicted_prob` and `true_labels`." ) } # Binary vs. Multiclass # Determine number of classes from number of columns in predicted_prob # If ncol is NULL, it is binary classification n_classes <- unique(sapply(probl, \(x) { if (is.null(ncol(x))) { 2L } else { ncol(x) } })) if (length(n_classes) > 1) { cli::cli_abort( "You must have the same number of classes in each set of `predicted_prob`." ) } # Check lengths of corresponding sets # NROW() works for both vectors and matrices for (i in seq_along(probl)) { if (NROW(probl[[i]]) != length(labelsl[[i]])) { cli::cli_abort( "You must have the same N of `predicted_prob` and `true_labels`." ) } } if (n_classes == 2L) { .roc <- lapply(seq_along(probl), \(i) { pROC::roc( response = labelsl[[i]], predictor = probl[[i]], levels = levels(labelsl[[i]]), direction = "<" ) }) } else { .roc <- lapply(seq_along(probl), \(i) { pred <- probl[[i]] if (is.null(colnames(pred))) { if (multiclass_fill_labels) { colnames(pred) <- levels(labelsl[[i]]) } else { cli::cli_abort( "For multiclass, `predicted_prob` must have column names matching levels of `true_labels`." ) } } pROC::multiclass.roc( response = labelsl[[i]], predictor = pred, levels = levels(labelsl[[i]]) ) }) } .names <- names(probl) if (n_classes == 2L) { TPR <- lapply(.roc, \(r) r[["sensitivities"]]) FPR <- lapply(.roc, \(r) 1 - r[["specificities"]]) AUC <- lapply(.roc, \(r) r[["auc"]]) } else { TPR <- lapply(.roc, \(r) r[["rocs"]][[1]][["sensitivities"]]) FPR <- lapply(.roc, \(r) 1 - r[["rocs"]][[1]][["specificities"]]) AUC <- lapply(.roc, \(r) r[["auc"]]) } names(TPR) <- names(FPR) <- names(AUC) <- .names theme@config[["zerolines"]] <- FALSE draw_scatter( x = FPR, y = TPR, xlab = "False Positive Rate", ylab = "True Positive Rate", main = main, theme = theme, palette = palette, mode = "lines", group_names = paste0(.names, " (", ddSci(unlist(AUC), auc_dp), ")"), legend = legend, legend_title = legend_title, legend_xy = legend_xy, legend_xanchor = legend_xanchor, legend_yanchor = legend_yanchor, xlim = xlim, ylim = ylim, diagonal = diagonal, diagonal_col = diagonal_col, axes_square = axes_square, order_on_x = FALSE, filename = filename, ... ) } # /rtemis::draw_roc ================================================ FILE: R/draw_scatter.R ================================================ # draw_scatter.R # ::rtemis:: # 2019- EDG rtemis.org #' Interactive Scatter Plots #' #' Draw interactive scatter plots using `plotly`. #' #' @param x Numeric, vector/data.frame/list: x-axis data. If y is NULL and `NCOL(x) > 1`, first two columns used as `x` and `y`, respectively. #' @param y Numeric, vector/data.frame/list: y-axis data. #' @param fit Character: Fit method. #' @param se_fit Logical: If TRUE, include standard error of the fit. #' @param se_times Numeric: Multiplier for standard error. #' @param include_fit_name Logical: If TRUE, include fit name in legend. #' @param cluster Character: Clustering method. #' @param cluster_config List: Config for clustering. #' @param group Factor: Grouping variable. # @param formula Formula: Formula for non-linear least squares fit. #' @param rsq Logical: If TRUE, print R-squared values in legend if `fit` is set. #' @param mode Character, vector: "markers", "lines", "markers+lines". #' @param order_on_x Logical: If TRUE, order `x` and `y` on `x`. #' @param main Character: Main title. #' @param subtitle Character: Subtitle. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param alpha Numeric: Alpha for markers. #' @param theme `Theme` object. #' @param palette Character vector: Colors to use. #' @param axes_square Logical: If TRUE, draw a square plot. #' @param group_names Character: Names for groups. #' @param font_size Numeric: Font size. #' @param marker_col Color for markers. #' @param marker_size Numeric: Marker size. #' @param symbol Character: Marker symbol. #' @param fit_col Color for fit line. #' @param fit_alpha Numeric: Alpha for fit line. #' @param fit_lwd Numeric: Line width for fit line. #' @param line_shape Character: Line shape for line plots. Options: "linear", "hv", "vh", "hvh", "vhv". #' @param se_col Color for standard error band. #' @param se_alpha Numeric: Alpha for standard error band. #' @param scatter_type Character: Scatter plot type. #' @param show_marginal_x Logical: If TRUE, add marginal distribution line markers on x-axis. #' @param show_marginal_y Logical: If TRUE, add marginal distribution line markers on y-axis. #' @param marginal_x Numeric: Data for marginal distribution on x-axis. #' @param marginal_y Numeric: Data for marginal distribution on y-axis. #' @param marginal_x_y Numeric: Y position of marginal markers on x-axis. #' @param marginal_y_x Numeric: X position of marginal markers on y-axis. #' @param marginal_col Color for marginal markers. #' @param marginal_alpha Numeric: Alpha for marginal markers. #' @param marginal_size Numeric: Size of marginal markers. #' @param legend Logical: If TRUE, draw legend. #' @param legend_title Character: Title for legend. #' @param legend_trace Logical: If TRUE, draw legend trace. (For when you have `fit` and don't want a trace for the markers.) #' @param legend_xy Numeric: Position of legend. #' @param legend_xanchor Character: X anchor for legend. #' @param legend_yanchor Character: Y anchor for legend. #' @param legend_orientation Character: Orientation of legend. #' @param legend_col Color for legend text. #' @param legend_bg Color for legend background. #' @param legend_border_col Color for legend border. #' @param legend_borderwidth Numeric: Border width for legend. #' @param legend_group_gap Numeric: Gap between legend groups. #' @param x_showspikes Logical: If TRUE, show spikes on x-axis. #' @param y_showspikes Logical: If TRUE, show spikes on y-axis. #' @param spikedash Character: Dash type for spikes. #' @param spikemode Character: Spike mode. #' @param spikesnap Character: Spike snap mode. #' @param spikecolor Color for spikes. #' @param spikethickness Numeric: Thickness of spikes. #' @param margin List: Plot margins. #' @param main_y Numeric: Y position of main title. #' @param main_yanchor Character: Y anchor for main title. #' @param subtitle_x Numeric: X position of subtitle. #' @param subtitle_y Numeric: Y position of subtitle. #' @param subtitle_xref Character: X reference for subtitle. #' @param subtitle_yref Character: Y reference for subtitle. #' @param subtitle_xanchor Character: X anchor for subtitle. #' @param subtitle_yanchor Character: Y anchor for subtitle. #' @param automargin_x Logical: If TRUE, automatically adjust x-axis margins. #' @param automargin_y Logical: If TRUE, automatically adjust y-axis margins. #' @param xlim Numeric: Limits for x-axis. #' @param ylim Numeric: Limits for y-axis. #' @param axes_equal Logical: If TRUE, set equal scaling for axes. #' @param diagonal Logical: If TRUE, add diagonal line. #' @param diagonal_col Color for diagonal line. #' @param diagonal_dash Character: "solid", "dash", "dot", "dashdot", "longdash", "longdashdot". Dash type for diagonal line. #' @param diagonal_alpha Numeric: Alpha for diagonal line. #' @param fit_params `Hyperparameters` for fit. #' @param vline Numeric: X position for vertical line. #' @param vline_col Color for vertical line. #' @param vline_width Numeric: Width for vertical line. #' @param vline_dash Character: Dash type for vertical line. #' @param hline Numeric: Y position for horizontal line. #' @param hline_col Color for horizontal line. #' @param hline_width Numeric: Width for horizontal line. #' @param hline_dash Character: Dash type for horizontal line. #' @param hovertext List: Hover text for markers. #' @param width Numeric: Width of plot. #' @param height Numeric: Height of plot. #' @param displayModeBar Logical: If TRUE, display mode bar. #' @param modeBar_file_format Character: File format for mode bar. #' @param scrollZoom Logical: If TRUE, enable scroll zoom. #' @param filename Character: Filename to save plot. #' @param file_width Numeric: Width of saved file. #' @param file_height Numeric: Height of saved file. #' @param file_scale Numeric: Scale of saved file. #' @param verbosity Integer: Verbosity level. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' draw_scatter(iris$Sepal.Length, iris$Petal.Length, #' fit = "gam", se_fit = TRUE, group = iris$Species #' ) draw_scatter <- function( x, y = NULL, fit = NULL, se_fit = FALSE, se_times = 1.96, include_fit_name = TRUE, cluster = NULL, cluster_config = list(k = 2), group = NULL, # formula = NULL, rsq = TRUE, mode = "markers", order_on_x = NULL, main = NULL, subtitle = NULL, xlab = NULL, ylab = NULL, alpha = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), axes_square = FALSE, group_names = NULL, font_size = 16, marker_col = NULL, marker_size = 8, symbol = "circle", fit_col = NULL, fit_alpha = .8, fit_lwd = 2.5, line_shape = "linear", se_col = NULL, se_alpha = .4, scatter_type = "scatter", show_marginal_x = FALSE, show_marginal_y = FALSE, marginal_x = x, marginal_y = y, marginal_x_y = NULL, marginal_y_x = NULL, marginal_col = NULL, marginal_alpha = .333, marginal_size = 10, legend = NULL, legend_title = NULL, legend_trace = TRUE, legend_xy = c(0, .98), legend_xanchor = "left", legend_yanchor = "auto", legend_orientation = "v", legend_col = NULL, legend_bg = "#FFFFFF00", legend_border_col = "#FFFFFF00", legend_borderwidth = 0, legend_group_gap = 0, x_showspikes = FALSE, y_showspikes = FALSE, spikedash = "solid", spikemode = "across", spikesnap = "hovered data", spikecolor = NULL, spikethickness = 1, margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0), main_y = 1.01, main_yanchor = "bottom", subtitle_x = 0.02, subtitle_y = 0.99, subtitle_xref = "paper", subtitle_yref = "paper", subtitle_xanchor = "left", subtitle_yanchor = "top", automargin_x = TRUE, automargin_y = TRUE, xlim = NULL, ylim = NULL, axes_equal = FALSE, diagonal = FALSE, diagonal_col = NULL, diagonal_dash = "dot", diagonal_alpha = .66, fit_params = NULL, vline = NULL, vline_col = theme[["fg"]], vline_width = 1, vline_dash = "dot", hline = NULL, hline_col = theme[["fg"]], hline_width = 1, hline_dash = "dot", hovertext = NULL, width = NULL, height = NULL, displayModeBar = TRUE, modeBar_file_format = "svg", scrollZoom = TRUE, filename = NULL, file_width = 500, file_height = 500, file_scale = 1, verbosity = 0L ) { # Dependencies ---- check_dependencies("plotly") # Arguments ---- xname <- labelify(gsub(".*\\$", "", deparse(substitute(x)))) yname <- labelify(gsub(".*\\$", "", deparse(substitute(y)))) if (is.null(y) && NCOL(x) > 1) { if (is.null(xlab)) { xlab <- labelify(colnames(x)[1]) } if (is.null(ylab)) { ylab <- labelify(colnames(x)[2]) } y <- x[, 2] x <- x[, 1] } if (!is.null(fit)) { if (fit == "none") fit <- NULL } # easier to work with shiny if (is.logical(fit)) { if (fit) fit <- "GAM" } if (is.null(fit)) { se_fit <- FALSE } if (!is.null(fit)) { fit <- toupper(fit) } if (!is.null(main)) { main <- paste0("", main, "") } .mode <- mode .names <- group_names check_is_S7(theme, Theme) if (se_fit) { if (!fit %in% c("GLM", "LM", "LOESS", "GAM", "NW")) { warning(paste( "Standard error of the fit not available for", fit, "- try LM, LOESS, GAM, or NW" )) se_fit <- FALSE } } # order_on_x ---- if (is.null(order_on_x)) { order_on_x <- if (!is.null(fit) || any(grepl("lines", mode))) { TRUE } else { FALSE } } # Cluster ---- if (!is.null(cluster)) { group <- suppressWarnings( cluster( x = data.frame(x, y), algorithm = cluster, config = do_call( get_clust_setup_fn(cluster), cluster_config ) )@clusters ) group <- paste("Cluster", group) } # Data ---- # xlab, ylab ---- # The gsubs remove all text up to and including a "$" symbol if present if (is.null(xlab)) { if (is.list(x)) xlab <- "x" else xlab <- xname } if (!is.null(y) && is.null(ylab)) { if (is.list(y)) ylab <- "y" else ylab <- yname } # Group ---- if (!is.null(group)) { group <- as.factor(group) x <- split(x, group, drop = TRUE) y <- split(y, group, drop = TRUE) if (is.null(group_names)) { group_names <- levels(droplevels(group)) } names(x) <- names(y) <- .names <- group_names if (!is.null(hovertext)) hovertext <- split(hovertext, group, drop = TRUE) } # Try to get names from list or data frame inputs if (is.list(y) || NCOL(y) > 1) { if (is.null(.names) && !is.null(names(y))) .names <- names(y) } if (is.list(x) || NCOL(x) > 1) { if (is.null(.names) && !is.null(names(x))) .names <- names(x) } # Data to lists ---- x <- if (!is.list(x)) as.list(as.data.frame(x)) else x y <- if (!is.null(y) && !is.list(y)) as.list(as.data.frame(y)) else y hovertext <- if (!is.null(hovertext) && !is.list(hovertext)) { as.list(as.data.frame(hovertext)) } else { hovertext } if (length(x) == 1 && length(y) > 1) { x <- rep(x, length(y)) .names <- names(y) } if (length(y) == 1 && length(x) > 1) { y <- rep(y, length(x)) .names <- names(x) } if (!is.null(hovertext) && length(hovertext) == 1 && length(x) > 1) { hovertext <- rep(hovertext, length(x)) } n_groups <- length(x) if (is.null(legend)) { legend <- if (n_groups == 1 && is.null(fit)) FALSE else TRUE } if (length(.mode) < n_groups) { .mode <- c(.mode, rep(tail(.mode)[1], n_groups - length(.mode))) } # if (is.null(legend)) legend <- n_groups > 1 if (is.null(.names)) { if (n_groups > 1) { .names <- paste("Group", seq_len(n_groups)) } else { # .names <- if (!is.null(fit)) fit else NULL .names <- xname } } # Marginal data ---- if (show_marginal_x && is.null(marginal_x)) { marginal_x <- x } if (show_marginal_y && is.null(marginal_y)) { marginal_y <- y } # Reorder ---- if (order_on_x) { index <- lapply(x, order) x <- lapply(seq(x), \(i) x[[i]][index[[i]]]) y <- lapply(seq(x), \(i) y[[i]][index[[i]]]) if (!is.null(hovertext)) { hovertext <- lapply(seq(x), \(i) hovertext[[i]][index[[i]]]) } } # Colors ---- col <- recycle(palette, seq_len(n_groups)) if (is.null(alpha)) { alpha <- if (mode == "markers") { autoalpha(max(lengths(x))) } else { 1 } } # Theme ---- if (diagonal) { if (is.null(diagonal_col)) { diagonal_col <- theme[["fg"]] } diagonal_col <- adjustcolor(diagonal_col, diagonal_alpha) } bg <- plotly::toRGB(theme[["bg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]], theme[["grid_alpha"]]) tick_col <- plotly::toRGB(theme[["tick_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) if (!theme[["axes_visible"]]) { tick_col <- labs_col <- "transparent" } # marker_col, se_col === if (is.null(marker_col)) { marker_col <- if (!is.null(fit) && n_groups == 1) { as.list(rep(theme[["fg"]], n_groups)) } else { col } } if (!is.null(fit)) { if (is.null(fit_col)) fit_col <- col } if (se_fit && is.null(se_col)) { se_col <- col } if (is.null(legend_col)) { legend_col <- labs_col } if (is.null(spikecolor)) { spikecolor <- theme[["fg"]] } # Size ---- # fitted & se_fit ---- # If plotting se bands, need to include (fitted +/- se_times * se) in the axis limits if (se_fit) { se <- list() } else { se <- NULL } if (!is.null(fit)) { fitted <- list() fitted_text <- character() for (i in seq_len(n_groups)) { mod <- train( x = data.frame(x = x[[i]], y = y[[i]]), algorithm = fit, hyperparameters = fit_params, verbosity = verbosity - 1L ) fitted[[i]] <- fitted(mod) if (se_fit) { se[[i]] <- se(mod) } if (include_fit_name) { # fitted_text[i] <- switch(fit, # NLS = mod$extra$model, # NLA = mod$mod$formula, # fit # ) fitted_text[i] <- fit } else { fitted_text[i] <- "" } if (rsq) { fitted_text[i] <- paste0( fitted_text[i], if (n_groups == 1) " (" else " ", "R2 = ", ddSci(mod@metrics_training[["Rsq"]]), if (n_groups == 1) ")" ) } # if (rsq_pval) { # if (fit %in% c("LM", "GLM")) { # rsqp[[i]] <- paste0(ddSci(mod@metrics_training$Rsq), " (", # ddSci(summary(mod$mod)$coefficients[2, 4]), ")") # } else if (fit == "GAM") { # rsqp[[i]] <- paste0(ddSci(mod@metrics_training$Rsq), " (", # ddSci(summary(mod$mod)$s.pv), ")") # } # } } } # Axes Limits ---- if (axes_equal) { if (is.null(xlim)) { xlim <- getlim(unlist(x), "r", .06) } if (is.null(ylim)) { ylim <- getlim(unlist(y), "r", .06) if (is.list(fitted) && !is.list(se)) { ylim_hi <- max(unlist(fitted)) ylim_lo <- min(unlist(fitted)) ylim <- range(ylim_lo, ylim_hi, y) } if (is.list(se)) { ylim_hi <- max(unlist(lapply( seq_along(fitted), function(i) { as.data.frame(fitted[[i]]) + se_times * as.data.frame(se[[i]]) } ))) ylim_lo <- min(unlist(lapply( seq_along(fitted), function(i) { as.data.frame(fitted[[i]]) - se_times * as.data.frame(se[[i]]) } ))) ylim <- range(ylim_lo, ylim_hi, y) } } xlim <- ylim <- range(xlim, ylim) } # /axes_equal # unlist will coerce Dates to numeric, also don't want padding if (is.null(xlim) && !inherits(x[[1]], "Date")) { xlim <- getlim(unlist(x), "r", .06) } if (is.null(ylim) && !inherits(y[[1]], "Date")) { ylim <- getlim(unlist(y), "r", .06) } # plotly ---- if (!is.null(fit) && rsq) { if (!include_fit_name) { fitted_text <- gsub("^ ", "", fitted_text) } if (n_groups > 1) { .names <- paste0(.names, " (", fitted_text, ")") } } plt <- plotly::plot_ly( width = width, height = height ) if (diagonal) { lo <- min(xlim[1], ylim[1]) hi <- max(xlim[2], ylim[2]) plt <- plotly::layout( plt, shapes = list( type = "line", x0 = lo, x1 = hi, y0 = lo, y1 = hi, line = list( color = diagonal_col, dash = diagonal_dash ) ) ) } for (i in seq_len(n_groups)) { ## { Scatter } ---- marker <- if (grepl("markers", .mode[i])) { list( color = plotly::toRGB(marker_col[[i]], alpha = alpha), size = marker_size, symbol = symbol ) } else { NULL } plt <- plotly::add_trace( plt, x = x[[i]], y = y[[i]], type = scatter_type, mode = .mode[i], # fillcolor = plotly::toRGB(col[[i]], alpha), name = .names[i], # text = .text[[i]], # hoverinfo = "text", text = hovertext[[i]], marker = marker, line = if (grepl("lines", .mode[i])) { list( color = plotly::toRGB(marker_col[[i]], alpha = alpha), shape = line_shape ) } else { NULL }, legendgroup = if (legend_trace) { .names[i] } else { paste0(.names[i], "_marker") }, showlegend = legend && legend_trace ) # Marginal plots ---- # Add marginal plots by plotting short vertical markers on the x and y axes if (show_marginal_x) { if (is.null(marginal_col)) { marginal_col <- plotly::toRGB(marker_col, alpha = marginal_alpha) } if (is.null(marginal_x_y)) { marginal_x_y <- ylim[1] } # Extend ylim to include marginal markers ylim[1] <- ylim[1] - 0.02 * diff(ylim) for (i in seq_len(n_groups)) { plt <- plotly::add_trace( plt, x = marginal_x[[i]], y = rep(marginal_x_y, length(marginal_x[[i]])), type = "scatter", mode = "markers", marker = list( color = marginal_col[[i]], size = marginal_size, symbol = "line-ns-open" ), showlegend = FALSE, hoverinfo = "x" ) } } # /show_marginal_x if (show_marginal_y) { if (is.null(marginal_col)) { marginal_col <- plotly::toRGB(marker_col, alpha = marginal_alpha) } if (is.null(marginal_y_x)) { marginal_y_x <- xlim[1] } # Extend xlim to include marginal markers xlim[1] <- xlim[1] - 0.02 * diff(xlim) for (i in seq_len(n_groups)) { plt <- plotly::add_trace( plt, x = rep(marginal_y_x, length(marginal_y[[i]])), y = marginal_y[[i]], type = "scatter", mode = "markers", marker = list( color = marginal_col[[i]], size = marginal_size, symbol = "line-ew-open" ), showlegend = FALSE, hoverinfo = "y" # legendgroup = .names[i] ) } } # /show_marginal_y ## { SE band } ---- if (se_fit) { plt <- plotly::add_trace( plt, x = x[[i]], y = fitted[[i]] + se_times * se[[i]], type = scatter_type, mode = "lines", line = list(color = "transparent"), legendgroup = .names[i], showlegend = FALSE, hoverinfo = "none", inherit = FALSE ) plt <- plotly::add_trace( plt, x = x[[i]], y = fitted[[i]] - se_times * se[[i]], type = scatter_type, mode = "lines", fill = "tonexty", fillcolor = plotly::toRGB(se_col[[i]], alpha = se_alpha), line = list(color = "transparent"), # name = shade_name, legendgroup = .names[i], showlegend = FALSE, hoverinfo = "none", inherit = FALSE ) } if (!is.null(fit)) { ## { Fitted line } ---- lfit <- list( color = plotly::toRGB(fit_col[[i]], alpha = fit_alpha), width = fit_lwd ) plt <- plotly::add_trace( plt, x = x[[i]], y = fitted[[i]], type = scatter_type, mode = "lines", line = lfit, name = fitted_text[i], legendgroup = .names[i], showlegend = if (legend & n_groups == 1) TRUE else FALSE, inherit = FALSE ) } } # Layout ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) tickfont <- list( family = theme[["font_family"]], size = font_size, color = theme[["tick_labels_col"]] ) .legend <- list( title = list( text = legend_title, font = list( family = theme[["font_family"]], size = font_size, color = legend_col ) ), x = legend_xy[1], xanchor = legend_xanchor, y = legend_xy[2], yanchor = legend_yanchor, font = list( family = theme[["font_family"]], size = font_size, color = legend_col ), orientation = legend_orientation, bgcolor = plotly::toRGB(legend_bg), bordercolor = plotly::toRGB(legend_border_col), borderwidth = legend_borderwidth, tracegroupgap = legend_group_gap ) zerocol <- adjustcolor(theme[["zerolines_col"]], theme[["zerolines_alpha"]]) plt <- plotly::layout( plt, yaxis = list( title = ylab, showline = FALSE, showspikes = y_showspikes, spikecolor = spikecolor, spikedash = spikedash, spikemode = spikemode, spikesnap = spikesnap, spikethickness = spikethickness, titlefont = f, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = theme[["zerolines"]], zerolinecolor = zerocol, zerolinewidth = theme[["zerolines_lwd"]], range = ylim, automargin = automargin_y ), xaxis = list( title = list(text = xlab), showline = FALSE, showspikes = x_showspikes, spikecolor = spikecolor, spikedash = spikedash, spikemode = spikemode, spikesnap = spikesnap, spikethickness = spikethickness, # mirror = axes_mirrored, titlefont = f, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = theme[["zerolines"]], zerolinecolor = zerocol, zerolinewidth = theme[["zerolines_lwd"]], range = xlim, automargin = automargin_x ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]], yref = "paper", y = main_y, yanchor = main_yanchor ), # titlefont = list(), paper_bgcolor = bg, plot_bgcolor = plot_bg, margin = margin, showlegend = legend, legend = .legend ) # /layout ## vline ---- if (!is.null(vline)) { plt <- plotly::layout( plt, shapes = plotly_vline( vline, color = vline_col, width = vline_width, dash = vline_dash ) ) } ## hline ---- if (!is.null(hline)) { plt <- plotly::layout( plt, shapes = plotly_hline( hline, color = hline_col, width = hline_width, dash = hline_dash ) ) } ## square ---- if (axes_square) { plt <- plt |> plotly::layout( yaxis = list( scaleanchor = "x", scaleratio = 1 ) ) } # Subtitle ---- # add annotation at top left with same font as main title if (!is.null(subtitle)) { plt <- plt |> plotly::add_annotations( x = subtitle_x, y = subtitle_y, xref = subtitle_xref, yref = subtitle_yref, xanchor = subtitle_xanchor, yanchor = subtitle_yanchor, text = subtitle, showarrow = FALSE, font = list( family = theme[["font_family"]], size = font_size, color = main_col ) ) } # Config plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ), scrollZoom = scrollZoom ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_scatter #' True vs. Predicted Plot #' #' A `draw_scatter` wrapper for plotting true vs. predicted values #' #' @inheritParams draw_scatter #' @param x Numeric, vector/data.frame/list: True values. If y is NULL and #' `NCOL(x) > 1`, first two columns used as `x` and `y`, respectively #' @param y Numeric, vector/data.frame/list: Predicted values #' @param ... Additional arguments passed to [draw_scatter] #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' x <- rnorm(500) #' y <- x + rnorm(500) #' draw_fit(x, y) draw_fit <- function( x, y, xlab = "True", ylab = "Predicted", fit = "glm", se_fit = TRUE, axes_square = TRUE, axes_equal = TRUE, diagonal = TRUE, ... ) { draw_scatter( x, y, xlab = xlab, ylab = ylab, fit = fit, se_fit = se_fit, axes_square = axes_square, axes_equal = axes_equal, diagonal = diagonal, ... ) } # /rtemis::draw_fit ================================================ FILE: R/draw_spectrogram.R ================================================ # draw_spectrogram.R # ::rtemis:: # 2023 EDG rtemis.org # https://plotly.com/r/heatmaps/ #' Interactive Spectrogram #' #' Draw interactive spectrograms using `plotly` #' #' To set custom colors, use a minimum of `lo` and `hi`, optionally also #' `lomid`, `mid`, `midhi` colors and set `colorscale = NULL`. #' #' @param x Numeric: Time. #' @param y Numeric: Frequency. #' @param z Numeric: Power. #' @param colorgrad_n Integer: Number of colors in the gradient. #' @param colors Character: Custom colors for the gradient. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param zlab Character: z-axis label. #' @param hover_xlab Character: x-axis label for hover. #' @param hover_ylab Character: y-axis label for hover. #' @param hover_zlab Character: z-axis label for hover. #' @param zmin Numeric: Minimum value for color scale. #' @param zmax Numeric: Maximum value for color scale. #' @param zauto Logical: If TRUE, automatically set zmin and zmax. #' @param hoverlabel_align Character: Alignment of hover labels. #' @param colorscale Character: Color scale. #' @param colorbar_y Numeric: Y position of colorbar. #' @param colorbar_yanchor Character: Y anchor of colorbar. #' @param colorbar_xpad Numeric: X padding of colorbar. #' @param colorbar_ypad Numeric: Y padding of colorbar. #' @param colorbar_len Numeric: Length of colorbar. #' @param colorbar_title_side Character: Side of colorbar title. #' @param showgrid Logical: If TRUE, show grid. #' @param space Character: Color space for gradient. #' @param lo Character: Low color for gradient. #' @param lomid Character: Low-mid color for gradient. #' @param mid Character: Mid color for gradient. #' @param midhi Character: Mid-high color for gradient. #' @param hi Character: High color for gradient. #' @param grid_gap Integer: Space between cells. #' @param limits Numeric, length 2: Determine color range. Default = NULL, which automatically centers values around 0. #' @param main Character: Main title. #' @param key_title Character: Title of the key. #' @param showticklabels Logical: If TRUE, show tick labels. #' @param theme `Theme` object. #' @param font_size Numeric: Font size. #' @param padding Numeric: Padding between cells. #' @param displayModeBar Logical: If TRUE, display the plotly mode bar. #' @param modeBar_file_format Character: File format for image exports from the mode bar. #' @param filename Character: Filename to save the plot. Default is NULL. #' @param file_width Numeric: Width of exported image. #' @param file_height Numeric: Height of exported image. #' @param file_scale Numeric: Scale of exported image. #' @param ... Additional arguments to be passed to `heatmaply::heatmaply`. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # Example data #' time <- seq(0, 10, length.out = 100) #' freq <- seq(1, 100, length.out = 100) #' power <- outer(time, freq, function(t, f) sin(t) * cos(f)) #'draw_spectrogram( #' x = time, #' y = freq, #' z = power #' ) draw_spectrogram <- function( x, y, z, colorgrad_n = 101, colors = NULL, xlab = "Time", ylab = "Frequency", zlab = "Power", hover_xlab = xlab, hover_ylab = ylab, hover_zlab = zlab, zmin = NULL, zmax = NULL, zauto = TRUE, hoverlabel_align = "right", colorscale = "Jet", colorbar_y = .5, colorbar_yanchor = "middle", colorbar_xpad = 0, colorbar_ypad = 0, colorbar_len = .75, colorbar_title_side = "bottom", showgrid = FALSE, space = "rgb", lo = "#18A3AC", lomid = NULL, mid = NULL, midhi = NULL, hi = "#F48024", grid_gap = 0, limits = NULL, main = NULL, key_title = NULL, showticklabels = NULL, theme = choose_theme(getOption("rtemis_theme")), font_size = NULL, padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, ... ) { # Dependencies ---- check_dependencies("plotly") # Tick Labels ---- if (is.null(showticklabels)) { showticklabels <- c( ifelse(NCOL(z) < 50, TRUE, FALSE), ifelse(NROW(z) < 50, TRUE, FALSE) ) } if (is.null(font_size)) { font_size <- 17.0769 - 0.2692 * ncol(z) } # Limits ---- if (is.null(limits)) { maxabs <- max(abs(z), na.rm = TRUE) if (.2 < maxabs && maxabs < 1) { maxabs <- 1 } limits <- c(-maxabs, maxabs) } # Theme ---- check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) fg <- plotly::toRGB(theme[["fg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]]) # tick_col <- plotly::toRGB(theme[["tick_col"]]) tick_labels_col <- plotly::toRGB(theme[["tick_labels_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) # Colors ---- if (is.null(mid)) { mid <- theme[["bg"]] } colors <- colorgrad( n = colorgrad_n, colors = colors, space = space, lo = lo, lomid = lomid, mid = mid, midhi = midhi, hi = hi ) # Plot ---- plt <- plotly::plot_ly() plt <- plt |> plotly::add_trace( x = x, y = y, z = z, type = "heatmap", zauto = zauto, zmin = zmin, zmax = zmax, colorscale = colorscale, colors = colors, hovertemplate = paste0( hover_xlab, ": %{x:.3f}
", hover_ylab, ": %{y:.3f}
", hover_zlab, ": %{z:.3f}" ), showlegend = FALSE ) # Layout ---- # '- layout ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) tickfont <- list( family = theme[["font_family"]], size = font_size, color = tick_labels_col ) .legend <- list( font = list( family = theme[["font_family"]], size = font_size, color = fg ) ) plt <- plotly::layout( plt, yaxis = list( title = list( text = ylab, font = f ), titlefont = f, showgrid = showgrid, tickcolor = bg, showline = FALSE, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickfont = tickfont ), xaxis = list( title = list( text = xlab, font = f ), titlefont = f, showgrid = showgrid, tickcolor = bg, showline = FALSE, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickfont = tickfont ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]] ), paper_bgcolor = bg, plot_bgcolor = plot_bg, legend = .legend, hoverlabel = list(align = hoverlabel_align) ) # Manual theme colors ## y axis tick label colors # plt[["x"]][["layoutAttrs"]][[2]][["yaxis2"]][["tickfont"]][["color"]] ## x axis tick label colors # plt[["x"]][["layoutAttrs"]][[2]][["xaxis"]][["tickfont"]][["color"]] <- "rgba(255, 0, 0, 1)" ## edge lines must be invisible plt[["x"]][["layout"]][["yaxis"]][["linecolor"]] <- plt[["x"]][["layout"]][[ "xaxis2" ]][["linecolor"]] <- theme[["bg"]] # Manual layout ---- # Set padding plt[["sizingPolicy"]][["padding"]] <- padding # Colorbar ---- # https://plotly.com/r/reference/#scatter-marker-colorbar plt <- plt |> plotly::colorbar( y = colorbar_y, yanchor = colorbar_yanchor, title = list( text = zlab, font = f, side = colorbar_title_side ), tickfont = tickfont, xpad = colorbar_xpad, ypad = colorbar_ypad, len = colorbar_len ) # Config ---- plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ) ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_spectrogram ================================================ FILE: R/draw_survfit.R ================================================ # draw_survfit.R # ::rtemis:: # 2025 EDG rtemis.org # draw_scatter(time, survival_prob, mode = "lines", line_shape = "hv") # ?median lines, error bands, nrisk_table #' Draw a survfit object #' #' Draw a `survfit` object using [draw_scatter]. #' #' @inheritParams draw_scatter #' #' @param x `survfit` object created by [survival::survfit]. #' @param mode Character, vector: "markers", "lines", "markers+lines". # @param plot_median Logical: If `TRUE`, draw line(s) at 50% survival. #' @param xlim Numeric vector of length 2: x-axis limits. #' @param ylim Numeric vector of length 2: y-axis limits. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param main Character: Main title. #' @param symbol Character: Symbol to use for the points. #' @param nrisk_table Logical: If `TRUE`, subplot a table of the number at risk at each time point. #' @param ... Additional arguments passed to [draw_scatter]. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # Get the lung dataset #' data(cancer, package = "survival") #' sf1 <- survival::survfit(survival::Surv(time, status) ~ 1, data = lung) #' draw_survfit(sf1) #' sf2 <- survival::survfit(survival::Surv(time, status) ~ sex, data = lung) #' draw_survfit(sf2) #' # with N at risk table #' draw_survfit(sf2) draw_survfit <- function( x, # plot_median = TRUE, mode = "lines", symbol = "cross", line_shape = "hv", xlim = NULL, ylim = NULL, xlab = "Time", ylab = "Survival", main = NULL, legend_xy = c(1, 1), legend_xanchor = "right", legend_yanchor = "top", theme = choose_theme(getOption("rtemis_theme")), nrisk_table = FALSE, filename = NULL, ... ) { # Checks ---- check_inherits(x, "survfit") # Data ---- nstrata <- if (is.null(x[["strata"]])) { 1 } else { length(x[["strata"]]) } if (nstrata > 1) { .group <- unlist(sapply(seq_len(nstrata), function(i) { rep(i, x[["strata"]][i]) })) } else { .group <- rep(1, length(x[["time"]])) } # Limits ---- if (is.null(xlim)) { xlim <- c(0, max(x[["time"]], na.rm = TRUE)) } if (is.null(ylim)) { ylim <- c(0, 1) } # Plot ---- draw_scatter( x = split(x[["time"]], .group), y = split(x[["surv"]], .group), xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, main = main, theme = theme, mode = mode, symbol = symbol, line_shape = line_shape, filename = filename, legend_xy = legend_xy, legend_xanchor = legend_xanchor, legend_yanchor = legend_yanchor, ... ) } # /rtemis::draw_survfit ================================================ FILE: R/draw_table.R ================================================ # draw_table.R # ::rtemis:: # 2019 EDG rtemis.org #' Simple HTML table #' #' Draw an html table using `plotly` #' #' @param x data.frame: Table to draw #' @param .ddSci Logical: If TRUE, apply [ddSci] to numeric columns. #' @param main Character: Table tile. #' @param main_col Color: Title color. #' @param main_x Float \[0, 1\]: Align title: 0: left, .5: center, 1: right. #' @param main_xanchor Character: "auto", "left", "right": plotly's layout xanchor for #' title. #' @param fill_col Color: Used to fill header with column names and first column with #' row names. #' @param table_bg Color: Table background. #' @param bg Color: Background. #' @param line_col Color: Line color. #' @param lwd Float: Line width. #' @param header_font_col Color: Header font color. #' @param table_font_col Color: Table font color. #' @param font_size Integer: Font size. #' @param font_family Character: Font family. #' @param margin List: plotly's margins. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' df <- data.frame( #' Name = c("Alice", "Bob", "Charlie"), #' Age = c(25, 30, 35), #' Score = c(90.5, 85.0, 88.0) #' ) #' p <- draw_table( #' df, #' main = "Sample Table", #' main_col = "#00b2b2" #' ) draw_table <- function( x, .ddSci = TRUE, main = NULL, main_col = "black", main_x = 0, main_xanchor = "auto", fill_col = "#18A3AC", table_bg = "white", bg = "white", line_col = "white", lwd = 1, header_font_col = "white", table_font_col = "gray20", font_size = 14, font_family = "Helvetica Neue", margin = list( l = 0, r = 5, t = 30, b = 0, pad = 0 ) ) { # Dependencies ---- check_dependencies("plotly") # Input ---- x <- as.data.frame(x) if (.ddSci) { # x <- dplyr::mutate_if(x, is.numeric, ddSci) # Lose the dep: x <- data.frame(lapply(x, function(x) if (is.numeric(x)) ddSci(x) else x)) } # Colnames ---- if (!is.null(colnames(x))) { colnames(x) <- paste0("", colnames(x), "") } # Rownames ---- if (!is.null(rownames(x))) { rownames(x) <- paste0("", rownames(x), "") } # plotly ---- plt <- plotly::plot_ly(x) plt <- plotly::add_table( plt, header = list( line = list( width = lwd, color = c( "rgba(255,255,255,0)", plotly::toRGB(line_col) ) ), fill = list( color = c( "rgba(255,255,255,0)", plotly::toRGB(fill_col) ) ), align = c("right", "center"), font = list( color = plotly::toRGB(header_font_col), family = font_family, size = font_size ) ), cells = list( line = list( width = lwd, color = c( plotly::toRGB(line_col), plotly::toRGB(fill_col) ) ), fill = list( color = c( plotly::toRGB(fill_col), plotly::toRGB(table_bg) ) ), align = c("right", "center"), font = list( color = c( plotly::toRGB(header_font_col), plotly::toRGB(table_font_col) ), family = font_family, size = font_size ) ) ) # layout ---- main <- paste0("", main, "") plt <- plotly::layout( plt, title = list( text = main, font = list( family = font_family, size = font_size, color = main_col ), x = main_x, xanchor = main_xanchor ), paper_bgcolor = plotly::toRGB(bg), margin = margin ) plt } # /rtemis::draw_table ================================================ FILE: R/draw_ts.R ================================================ # draw_ts.R # ::rtemis:: # 2022 EDG rtemis.org # => recalc limits for fn = "sum" #' Interactive Timeseries Plots #' #' Draw interactive timeseries plots using `plotly` #' #' @param x Numeric vector of values to plot or list of vectors #' @param time Numeric or Date vector of time corresponding to values of `x` #' @param window Integer: apply `roll_fn` over this many units of time #' @param group Factor defining groups #' @param roll_fn Character: "mean", "median", "max", or "sum": Function to apply on #' rolling windows of `x` #' @param roll_col Color for rolling line #' @param roll_alpha Numeric: transparency for rolling line #' @param roll_lwd Numeric: width of rolling line #' @param roll_name Rolling function name (for annotation) #' @param alpha Numeric \[0, 1\]: Transparency #' @param align Character: "center", "right", or "left" #' @param group_names Character vector of group names #' @param xlab Character: x-axis label #' @param n_xticks Integer: number of x-axis ticks to use (approximately) # @param tickmode #' @param scatter_type Character: "scatter" or "lines" #' @param legend Logical: If TRUE, show legend #' @param x_showspikes Logical: If TRUE, show x-axis spikes on hover #' @param y_showspikes Logical: If TRUE, show y-axis spikes on hover #' @param spikedash Character: dash type string ("solid", "dot", "dash", #' "longdash", "dashdot", or "longdashdot") or a dash length list in px #' (eg "5px,10px,2px,2px") #' @param displayModeBar Logical: If TRUE, display plotly's modebar #' @param theme `Theme` object. #' @param palette Character: palette name, or list of colors #' @param filename Character: Path to filename to save plot #' @param spikemode Character: If "toaxis", spike line is drawn from the data #' point to the axis the series is plotted on. If "across", the line is drawn #' across the entire plot area, and supercedes "toaxis". If "marker", then a #' marker dot is drawn on the axis the series is plotted on #' @param spikesnap Character: "data", "cursor", "hovered data". Determines #' whether spikelines are stuck to the cursor or to the closest datapoints. #' @param spikecolor Color for spike lines #' @param spikethickness Numeric: spike line thickness #' @param modeBar_file_format Character: modeBar image export file format #' @param file_width Numeric: image export width #' @param file_height Numeric: image export height #' @param file_scale Numeric: image export scale #' @param ... Additional arguments to be passed to [draw_scatter] #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' time <- sample(seq(as.Date("2020-03-01"), as.Date("2020-09-23"), length.out = 140)) #' x1 <- rnorm(140) #' x2 <- rnorm(140, 1, 1.2) #' # Single timeseries #' draw_ts(x1, time) #' # Multiple timeseries input as list #' draw_ts(list(Alpha = x1, Beta = x2), time) #' # Multiple timeseries grouped by group, different lengths #' time1 <- sample(seq(as.Date("2020-03-01"), as.Date("2020-07-23"), length.out = 100)) #' time2 <- sample(seq(as.Date("2020-05-01"), as.Date("2020-09-23"), length.out = 140)) #' time <- c(time1, time2) #' x <- c(rnorm(100), rnorm(140, 1, 1.5)) #' group <- c(rep("Alpha", 100), rep("Beta", 140)) #' draw_ts(x, time, 7, group) draw_ts <- function( x, time, window = 7L, group = NULL, roll_fn = c("mean", "median", "max", "none"), roll_col = NULL, roll_alpha = 1, roll_lwd = 2, roll_name = NULL, alpha = NULL, align = "center", group_names = NULL, xlab = "Time", n_xticks = 12, # tickmode = "array", scatter_type = "scatter", legend = TRUE, x_showspikes = TRUE, y_showspikes = FALSE, spikedash = "solid", spikemode = "across", spikesnap = "hovered data", spikecolor = NULL, spikethickness = 1, displayModeBar = TRUE, modeBar_file_format = "svg", theme = choose_theme(getOption("rtemis_theme")), palette = getOption("rtemis_palette", "rtms"), filename = NULL, file_width = 500, file_height = 500, file_scale = 1, ... ) { # Arguments ---- roll_fn <- match.arg(roll_fn) if (roll_fn == "none") { window <- NULL } # Timeseries ---- if (!is.null(group)) { x <- split(x, group) time <- split(time, group) } if (is.data.frame(x)) { x <- as.list(x) } if (!is.list(x)) { x <- list(x) } if (is.data.frame(time)) { time <- as.list(time) } if (!is.list(time)) { time <- list(time) } if (is.null(group_names)) { group_names <- if (!is.null(names(x))) { names(x) } else { paste("Group", seq_along(x)) } } idx <- lapply(time, order) time <- lapply(seq_along(time), \(i) time[[i]][idx[[i]]]) if (length(time) < length(x)) { time <- rep(time, length(x) / length(time)) idx <- rep(idx, length(x) / length(idx)) } x <- lapply(seq_along(x), \(i) x[[i]][idx[[i]]]) # xtl <- lapply(seq_along(x), \(i) zoo::zoo(x[[i]], time[[i]])) if (!is.null(window) && window > 0) { avg_line <- switch( roll_fn, mean = lapply( x, \(xt) data.table::frollmean(xt, n = window, align = align) ), median = lapply( x, \(xt) data.table::frollapply(xt, n = window, median, align = align) ), max = lapply( x, \(xt) data.table::frollapply(xt, n = window, max, align = align) ), sum = lapply(x, \(xt) data.table::frollsum(xt, n = window, align = align)) ) } # Palette ---- if (is.character(palette)) { palette <- get_palette(palette) } if (is.null(roll_col)) { roll_col <- palette[seq_along(x)] } # draw_scatter ---- plt <- draw_scatter( time, x, xlab = xlab, theme = theme, palette = palette, alpha = alpha, group_names = group_names, legend = legend, scatter_type = scatter_type, x_showspikes = x_showspikes, y_showspikes = y_showspikes, spikedash = spikedash, spikemode = spikemode, spikesnap = spikesnap, spikecolor = spikecolor, spikethickness = spikethickness, ... ) # Rolling function line ---- if (is.null(roll_name)) { roll_name <- paste0("Rolling ", roll_fn, " (window=", window, ")") } if (!is.null(window)) { for (i in seq_along(x)) { plt <- plt |> plotly::add_trace( x = time[[i]], y = avg_line[[i]], type = "scatter", mode = "lines", line = list( color = plotly::toRGB(roll_col[[i]], alpha = roll_alpha), width = roll_lwd ), name = roll_name ) } } # Config plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ) ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_ts ================================================ FILE: R/draw_varimp.R ================================================ # draw_varimp.R # ::rtemis:: # 2017 EDG rtemis.org #' Interactive Variable Importance Plot #' #' Plot variable importance using `plotly` #' #' A simple `plotly` wrapper to plot horizontal barplots, sorted by value, #' which can be used to visualize variable importance, model coefficients, etc. #' #' @param x Numeric vector (or coercible to numeric): Input. #' @param names Vector, string: Names of features. #' @param main Character: Main title. #' @param type Character: "bar" or "line". #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param plot_top Integer: Plot this many top features. #' @param orientation Character: "h" or "v". #' @param line_width Numeric: Line width. #' @param labelify Logical: If TRUE, labelify feature names. #' @param alpha Numeric: Transparency. #' @param palette Character vector: Colors to use. #' @param mar Vector, numeric, length 4: Plot margins in pixels (NOT inches). #' @param font_size Integer: Overall font size to use (essentially for the #' title at this point). #' @param axis_font_size Integer: Font size to use for axis labels and tick labels. #' @param theme `Theme` object. #' @param showlegend Logical: If TRUE, show legend. #' @param filename Character: Path to save the plot image. #' @param file_width Numeric: Width of the saved plot image. #' @param file_height Numeric: Height of the saved plot image. #' @param file_scale Numeric: Scale of the saved plot image. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' # synthetic data #' x <- rnorm(10) #' names(x) <- paste0("Feature_", seq(x)) #' draw_varimp(x) #' draw_varimp(x, orientation = "h") draw_varimp <- function( x, names = NULL, main = NULL, type = c("bar", "line"), xlab = NULL, ylab = NULL, plot_top = 1, # 1 or less means plot this percent orientation = "v", line_width = 12, labelify = TRUE, alpha = 1, palette = get_palette(getOption("rtemis_palette")), mar = NULL, font_size = 16, axis_font_size = 14, theme = choose_theme(getOption("rtemis_theme")), showlegend = TRUE, filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) { # Dependencies ---- check_dependencies("plotly") # Arguments ---- type <- match.arg(type) if (is.null(mar)) { mar <- if (is.null(main)) c(20, 20, 20, 20) else c(20, 20, 40, 20) } # Theme ---- check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]]) tick_col <- plotly::toRGB(theme[["tick_col"]]) labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) ## Axis font ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) ## Tick font ---- tickfont <- list( family = theme[["font_family"]], size = font_size, color = theme[["tick_labels_col"]] ) # Data ---- if (NCOL(x) > 1 && NROW(x) > 1) { cli::cli_abort("x must be a vector or single row or column") } ## Names ---- if (is.null(names)) { if (is.null(names(x))) { .names <- if (NCOL(x) == 1) { labelify(rownames(x)) } else { labelify(colnames(x)) } } else { .names <- labelify(names(x)) } } else { .names <- labelify(names) } x <- as.numeric(x) if (length(.names) == 0) { .names <- paste("Feature", seq_along(x)) } ## Index ---- index <- if (plot_top <= 1) { order(abs(x))[(length(x) - plot_top * length(x)):length(x)] } else { if (plot_top > length(x)) { plot_top <- length(x) } order(abs(x))[(length(x) - plot_top + 1):length(x)] } x <- x[index] .names <- .names[index] # reorder to arrange negative to positive index <- order(x) x <- x[index] .names <- .names[index] y <- factor(.names, levels = .names) # Colors ---- col <- palette[[1]] col <- color_adjust(col, alpha = alpha) # plotly ---- if (type == "bar") { plt <- plotly::plot_ly( x = if (orientation == "h") x else y, y = if (orientation == "h") y else x, type = "bar", marker = list( color = col, line = list(width = NULL) ), showlegend = FALSE ) } else { # Plot each x[i] value as a line segment from 0 to x[i] plt <- plotly::plot_ly() for (i in seq_along(x)) { plt <- plotly::add_trace( plt, x = if (orientation == "h") c(0, x[i]) else c(y[i], y[i]), y = if (orientation == "h") c(y[i], y[i]) else c(0, x[i]), type = "scatter", mode = "lines", line = list(color = col, width = line_width), name = .names[i], showlegend = FALSE, # Show "_name[i]: value" on hover hoverinfo = "text", hovertext = paste0(.names[i], ": ", ddSci(x[i])) ) } } # Layout ---- if (is.null(xlab)) { xlab <- if (orientation == "h") "Variable Importance" else "" } if (is.null(ylab)) { ylab <- if (orientation == "h") "" else "Variable Importance" } plt <- plotly::layout( plt, margin = list( b = mar[1], l = mar[2], t = mar[3], r = mar[4], pad = 0 ), # inner plot area padding xaxis = list( title = list( text = xlab, font = f ), # showline = axes_visible, # mirror = axes_mirrored, showgrid = FALSE, gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = FALSE ), yaxis = list( title = list( text = ylab, font = f ), # showline = axes_visible, # mirror = axes_mirrored, showgrid = theme[["grid"]], # gridcolor = grid_col, # gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tickfont, zeroline = FALSE ), title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]] ), paper_bgcolor = bg, plot_bgcolor = plot_bg ) # Remove padding plt[["sizingPolicy"]][["padding"]] <- 0 # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # draw_varimp ================================================ FILE: R/draw_volcano.R ================================================ # draw_volcano # ::rtemis:: # 2022 EDG rtemis.org # allow custom grouping # References # https://github.com/plotly/plotly.js/blob/master/src/plot_api/plot_config.js #' Volcano Plot #' #' @param x Numeric vector: Input values, e.g. log2 fold change, coefficients, etc. #' @param pvals Numeric vector: p-values. #' @param xnames Character vector: `x` names. #' @param group Optional factor: Used to color code points. If NULL, significant points #' below `x_thresh`, non-significant points, and significant points #' above `x_thresh` will be plotted with the first, second and third #' color of `palette`. #' @param x_thresh Numeric x-axis threshold separating low from high. #' @param p_thresh Numeric: p-value threshold of significance. #' @param p_adjust_method Character: p-value adjustment method. #' "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none". #' Default = "holm". Use "none" for raw p-values. #' @param p_transform function. #' @param legend Logical: If TRUE, show legend. Will default to FALSE, if #' `group = NULL`, otherwise to TRUE. #' @param legend_lo Character: Legend to annotate significant points below the #' `x_thresh`. #' @param legend_hi Character: Legend to annotate significant points above the #' `x_thresh`. #' @param label_lo Character: label for low values. #' @param label_hi Character: label for high values. #' @param main Character: Main title. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param margin Named list of plot margins. #' Default = `list(b = 65, l = 65, t = 50, r = 10, pad = 0)`. #' @param xlim Numeric vector, length 2: x-axis limits. #' @param ylim Numeric vector, length 2: y-axis limits. #' @param alpha Numeric: point transparency. #' @param hline Numeric: If defined, draw a horizontal line at this y value. #' @param hline_col Color for `hline`. #' @param hline_width Numeric: Width for `hline`. #' @param hline_dash Character: Type of line to draw: "solid", "dot", "dash", #' "longdash", "dashdot", or "longdashdot". #' @param hline_annotate Character: Text of horizontal line annotation if #' `hline` is set. #' @param hline_annotation_x Numeric: x position to place annotation with paper #' as reference. 0: to the left of the plot area; 1: to the right of the plot area. #' @param annotate Logical: If TRUE, annotate significant points. #' @param annotate_col Color for annotations. #' @param theme `Theme` object. #' @param font_size Integer: Font size. #' @param palette Character vector: Colors to use. If `group` is NULL, the first, second and third #' colors will be used for significant points with negative coefficients, non-significant points, and #' significant points with positive coefficients, respectively. If `group` is not NULL, colors will #' be assigned to groups, in order of appearance. #' @param legend_x_lo Numeric: x position of `legend_lo`. #' @param legend_x_hi Numeric: x position of `legend_hi`. #' @param legend_y Numeric: y position for `legend_lo` and `legend_hi`. #' @param annotate_n Integer: Number of significant points to annotate. #' @param ax_lo Numeric: Sets the x component of the arrow tail about the arrow head for #' significant points below `x_thresh`. #' @param ay_lo Numeric: Sets the y component of the arrow tail about the arrow head for #' significant points below `x_thresh`. #' @param ax_hi Numeric: Sets the x component of the arrow tail about the arrow head for #' significant points above `x_thresh`. #' @param ay_hi Numeric: Sets the y component of the arrow tail about the arrow head for #' significant points above `x_thresh`. #' @param annotate_alpha Numeric: Transparency for annotations. #' @param hovertext Character vector: Text to display on hover. #' @param displayModeBar Logical: If TRUE, display plotly mode bar. #' @param filename Character: Path to save the plot image. #' @param file_width Numeric: Width of the saved plot image. #' @param file_height Numeric: Height of the saved plot image. #' @param file_scale Numeric: Scale of the saved plot image. #' @param verbosity Integer: Verbosity level. #' @param ... Additional arguments passed to [draw_scatter]. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' set.seed(2019) #' y <- rnormmat(500, 500, return_df = TRUE) #' x <- data.frame(x = y[, 3] + y[, 5] - y[, 9] + y[, 15] + rnorm(500)) #' mod <- massGLM(x, y) #' draw_volcano(summary(mod)[["Coefficient_x"]], summary(mod)[["p_value_x"]]) draw_volcano <- function( x, pvals, xnames = NULL, group = NULL, x_thresh = 0, p_thresh = .05, p_adjust_method = c( "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" ), p_transform = function(x) -log10(x), legend = NULL, legend_lo = NULL, legend_hi = NULL, label_lo = "Low", label_hi = "High", main = NULL, xlab = NULL, ylab = NULL, margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0), xlim = NULL, ylim = NULL, alpha = NULL, hline = NULL, hline_col = NULL, hline_width = 1, hline_dash = "solid", hline_annotate = NULL, hline_annotation_x = 1, theme = choose_theme(getOption("rtemis_theme")), annotate = TRUE, annotate_col = theme[["labs_col"]], font_size = 16, palette = NULL, legend_x_lo = NULL, legend_x_hi = NULL, legend_y = .97, annotate_n = 7L, ax_lo = NULL, # 40, ay_lo = NULL, ax_hi = NULL, # -40, ay_hi = NULL, annotate_alpha = .7, hovertext = NULL, displayModeBar = "hover", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, verbosity = 1L, ... ) { xname <- deparse(substitute(x)) p_adjust_method <- match.arg(p_adjust_method) filt <- !is.na(x) & !is.na(pvals) if (is.null(xnames)) { xnames <- names(x) } else { xnames <- xnames } xnames <- xnames[filt] if (!is.null(group)) { group <- group[filt] } x <- x[filt] pvals <- pvals[filt] if (is.null(xnames)) { xnames <- paste("Feature", seq_along(x)) } if (is.null(legend)) { legend <- !is.null(group) } p_adjusted <- p.adjust(pvals, method = p_adjust_method) index_ltpthresh <- p_adjusted < p_thresh p_transformed <- p_transform(p_adjusted) if (is.null(xlab)) { xlab <- labelify(xname) } # Default to lo - ns - hi groups if (is.null(group)) { group <- rep("NS", length(pvals)) group[index_ltpthresh & x < x_thresh] <- label_lo group[index_ltpthresh & x > x_thresh] <- label_hi group <- factor(group, levels = c(label_lo, "NS", label_hi)) if (is.null(palette)) { palette <- list("#43A4AC", "#7f7f7f", "#FA9860") } } group.counts <- table(group) include <- group.counts > 0 if (verbosity > 0L) { cat("Group counts:\n") print(group.counts) } # Colors for groups if (is.null(palette)) { palette <- get_palette(getOption("rtemis_palette", "rtms")) } # Theme ---- check_is_S7(theme, Theme) # y-axis label ---- if (is.null(ylab)) { ylab <- fn2label(p_transform, "p-value") if (p_adjust_method != "none") { ylab <- paste0(ylab, " (", p_adjust_method, "-corrected)") } } # Plot ---- if (is.null(hovertext)) { hovertext <- xnames } plt <- draw_scatter( x, p_transformed, main = main, xlab = xlab, ylab = ylab, xlim = xlim, ylim = ylim, alpha = alpha, theme = theme, margin = margin, legend = legend, group = group, palette = palette[include], hovertext = hovertext, ... ) # High - Low legend ---- autolegend_x_lo <- is.null(legend_x_lo) if (autolegend_x_lo) { # legend.x.lo <- Filter(\(x) x < x.thresh, x) |> range() |> diff() * -.2 + x.thresh legend_x_lo <- x_thresh - abs(diff(c(x_thresh, min(x, na.rm = TRUE)))) * .2 } autolegend_x_hi <- is.null(legend_x_hi) if (autolegend_x_hi) { # legend.x.hi <- Filter(\(x) x > x.thresh, x) |> range() |> diff() * .2 + x.thresh legend_x_hi <- x_thresh + abs(diff(c(x_thresh, max(x, na.rm = TRUE)))) * .2 } legxdiff <- legend_x_hi - legend_x_lo if (autolegend_x_lo) { legend_x_lo <- x_thresh - legxdiff / 2 } if (autolegend_x_hi) { legend_x_hi <- x_thresh + legxdiff / 2 } if (group.counts[1] > 0 && !is.null(legend_lo)) { plt <- plt |> plotly::add_annotations( x = legend_x_lo, y = legend_y, text = legend_lo, xref = "x", yref = "paper", showarrow = FALSE, font = list( color = palette[[1]], family = theme[["font_family"]], size = font_size ) ) } if (group.counts[3] > 0 && !is.null(legend_hi)) { plt <- plt |> plotly::add_annotations( x = legend_x_hi, y = legend_y, text = legend_hi, xref = "x", yref = "paper", showarrow = FALSE, font = list( color = palette[[3]], family = theme[["font_family"]], size = font_size ) ) } # Annotations ---- if (annotate) { index_ltxthresh <- x < x_thresh index_gtxthresh <- x > x_thresh index_lo <- index_ltpthresh & index_ltxthresh index_hi <- index_ltpthresh & index_gtxthresh annotate_n_lo <- annotate_n_hi <- annotate_n if (sum(index_lo) < annotate_n) { annotate_n_lo <- sum(index_lo) } if (sum(index_hi) < annotate_n) { annotate_n_hi <- sum(index_hi) } if (annotate_n_lo > 0) { lo_ord <- order(pvals[index_lo]) lo_x <- x[index_lo][lo_ord[seq_len(annotate_n_lo)]] lo_pval <- p_transformed[index_lo][lo_ord[seq_len(annotate_n_lo)]] lo_name <- xnames[index_lo][lo_ord[seq_len(annotate_n_lo)]] if (is.null(ay_lo)) { if (is.null(ay_lo)) { ay_lo <- drange(order(lo_pval), 30, -30) } } if (is.null(ax_lo)) { ax_lo <- 5 + 5 * annotate_n_lo } plt <- plt |> plotly::add_annotations( x = lo_x, y = lo_pval, text = lo_name, arrowhead = 4, arrowcolor = adjustcolor(theme[["fg"]], .33), arrowsize = .5, arrowwidth = 1, ax = ax_lo, ay = ay_lo, xanchor = "left", font = list( size = 16, family = theme[["font_family"]], color = adjustcolor(theme[["fg"]], annotate_alpha) ) ) } # Annotate 10 most significant increasing if (annotate_n_hi > 0) { hi_ord <- order(pvals[index_ltpthresh & index_gtxthresh]) hi_x <- x[index_ltpthresh & index_gtxthresh][hi_ord[seq_len( annotate_n_hi )]] hi_pval <- p_transformed[ index_ltpthresh & index_gtxthresh ][hi_ord[seq_len(annotate_n_hi)]] hi_name <- xnames[index_ltpthresh & index_gtxthresh][hi_ord[seq_len( annotate_n_hi )]] if (is.null(ay_hi)) { ay_hi <- drange(order(hi_pval), 50, -50) } if (is.null(ax_hi)) { ax_hi <- -5 - 5 * annotate_n_hi } plt <- plt |> plotly::add_annotations( x = hi_x, y = hi_pval, text = hi_name, arrowhead = 4, arrowcolor = adjustcolor(theme[["fg"]], .33), arrowsize = .5, arrowwidth = 1, ax = ax_hi, ay = ay_hi, xanchor = "right", font = list( size = 16, family = theme[["font_family"]], color = adjustcolor(theme[["fg"]], annotate_alpha) ) ) } } # hline ---- if (!is.null(hline)) { if (is.null(hline_col)) { hline_col <- theme[["fg"]] } hline_col <- recycle(hline_col, hline) hline_width <- recycle(hline_width, hline) hline_dash <- recycle(hline_dash, hline) hlinel <- lapply(seq_along(hline), function(i) { list( type = "line", x0 = 0, x1 = 1, xref = "paper", y0 = hline[i], y1 = hline[i], line = list( color = hline_col[i], width = hline_width[i], dash = hline_dash[i] ) ) }) plt <- plotly::layout(plt, shapes = hlinel) # Annotate horizontal lines on the right border of the plot if (!is.null(hline_annotate)) { plt <- plt |> plotly::add_annotations( xref = "paper", yref = "y", xanchor = "right", yanchor = "bottom", x = hline_annotation_x, y = hline, text = hline_annotate, font = list( family = theme[["font_family"]], size = font_size, color = annotate_col ), showarrow = FALSE ) } } # Config ---- plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = "svg", width = file_width, height = file_height, scale = file_scale ) ) # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly plt } # /rtemis::draw_volcano ================================================ FILE: R/draw_xt.R ================================================ # draw_xt.R # ::rtemis:: # 2024 EDG rtemis.org # Multiple legends # https://plotly.com/python/legend/#adding-multiple-legends # https://plotly.com/r/legend/ #' Plot timeseries data #' #' @param x Datetime vector or list of vectors. #' @param y Numeric vector or named list of vectors: y-axis data. #' @param x2 Datetime vector or list of vectors, optional: must be provided if `y2` does not #' correspond to values in `x`. A single x-axis will be drawn for all values in `x` and `x2`. #' @param y2 Numeric vector, optional: If provided, a second y-axis will be added to the right #' side of the plot. #' @param which_xy Integer vector: Indices of `x` and `y` to plot. #' If not provided, will select up to the first two x-y traces. #' @param which_xy2 Integer vector: Indices of `x2` and `y2` to plot. #' If not provided, will select up to the first two x2-y2 traces. #' @param shade_bin Integer vector \{0, 1\}: Time points in `x` to shade on the plot. For example, #' if there are 10 time points in `x`, and you want to shade time points 3 to 7, #' `shade_bin = c(0, 0, 1, 1, 1, 1, 1, 0, 0, 0)`. Only set `shade_bin` or `shade_interval`, not #' both. #' @param shade_interval List of numeric vectors: Intervals to shade on the plot. Only set #' `shade_bin` or `shade_interval`, not both. #' @param shade_col Color: Color to shade intervals. #' @param shade_x Numeric vector: x-values to use for shading. #' @param shade_name Character: Name for shaded intervals. #' @param shade_showlegend Logical: If TRUE, show legend for shaded intervals. #' @param ynames Character vector, optional: Names for each vector in `y`. #' @param y2names Character vector, optional: Names for each vector in `y2`. #' @param xlab Character: x-axis label. #' @param ylab Character: y-axis label. #' @param y2lab Character: y2-axis label. #' @param xunits Character: x-axis units. #' @param yunits Character: y-axis units. #' @param y2units Character: y2-axis units. #' @param yunits_col Color for y-axis units. #' @param y2units_col Color for y2-axis units. #' @param zt Numeric vector: Zeitgeber time. If provided, will be shown on the x-axis instead of #' `x`. To be used only with a single `x` vector and no `x2`. #' @param show_zt Logical: If TRUE, show zt on x-axis, if zt is provided. #' @param show_zt_every Optional integer: Show zt every `show_zt_every` ticks. If NULL, will be #' calculated to be `x_nticks` +/- 1 if `x_nticks` is not 0, otherwise 12 +/- 1. #' @param zt_nticks Integer: Number of zt ticks to show. Only used if `show_zt_every` is NULL. #' The actual number of ticks shown will depend on the periodicity of zt, so that zt = 0 is always #' included. #' @param main Character: Main title. #' @param main_y Numeric: Y position of main title. #' @param main_yanchor Character: "top", "middle", "bottom". #' @param x_nticks Integer: Number of ticks on x-axis. #' @param y_nticks Integer: Number of ticks on y-axis. #' @param show_rangeslider Logical: If TRUE, show a range slider. #' @param slider_start Numeric: Start of range slider. #' @param slider_end Numeric: End of range slider. #' @param theme `Theme` object. #' @param palette Character vector: Colors to be used to draw each vector in `y` and `y2`, in order. #' @param font_size Numeric: Font size for text. #' @param yfill Character: Fill type for y-axis: "none", "tozeroy", "tonexty". #' @param y2fill Character: Fill type for y2-axis: "none", "tozeroy", "tonexty". #' @param fill_alpha Numeric: Fill opacity for y-axis. #' @param yline_width Numeric: Line width for y-axis lines. #' @param y2line_width Numeric: Line width for y2-axis lines. #' @param x_showspikes Logical: If TRUE, show spikes on x-axis. #' @param spike_dash Character: Dash type for spikes: "solid", "dot", "dash", "longdash", #' "dashdot", "longdashdot". #' @param spike_col Color for spikes. #' @param x_spike_thickness Numeric: Thickness of spikes. `-2` avoids drawing border around spikes. #' @param tickfont_size Numeric: Font size for tick labels. #' @param x_tickmode Character: "auto", "linear", "array". #' @param x_tickvals Numeric vector: Tick positions. #' @param x_ticktext Character vector: Tick labels. #' @param x_tickangle Numeric: Angle of tick labels. #' @param legend_x Numeric: X position of legend. #' @param legend_y Numeric: Y position of legend. #' @param legend_xanchor Character: "left", "center", "right". #' @param legend_yanchor Character: "top", "middle", "bottom". #' @param legend_orientation Character: "v" for vertical, "h" for horizontal. #' @param margin Named list with 4 numeric values: "l", "r", "t", "b" for left, right, top, bottom #' margins. #' @param x_standoff Numeric: Distance from x-axis to x-axis label. #' @param y_standoff Numeric: Distance from y-axis to y-axis label. #' @param y2_standoff Numeric: Distance from y2-axis to y2-axis label. #' @param hovermode Character: "closest", "x", "x unified". #' @param displayModeBar Logical: If TRUE, display plotly mode bar. #' @param modeBar_file_format Character: "png", "svg", "jpeg", "webp", "pdf": file format for mode #' bar image export. #' @param scrollZoom Logical: If TRUE, enable zooming by scrolling. #' @param filename Character: Path to save the plot image. #' @param file_width Numeric: Width of the saved plot image. #' @param file_height Numeric: Height of the saved plot image. #' @param file_scale Numeric: Scale of the saved plot image. #' #' @return `plotly` object. #' #' @author EDG #' @export #' #' @examplesIf interactive() #' datetime <- seq( #' as.POSIXct("2020-01-01 00:00"), #' as.POSIXct("2020-01-02 00:00"), #' by = "hour" #' ) #' df <- data.frame( #' datetime = datetime, #' value1 = rnorm(length(datetime)), #' value2 = rnorm(length(datetime)) #' ) #' draw_xt(df, x = df[, 1], y = df[, 2:3]) draw_xt <- function( x, y, x2 = NULL, y2 = NULL, which_xy = NULL, which_xy2 = NULL, # Shade intervals shade_bin = NULL, shade_interval = NULL, shade_col = NULL, shade_x = NULL, shade_name = "", shade_showlegend = FALSE, ynames = NULL, y2names = NULL, xlab = NULL, ylab = NULL, y2lab = NULL, xunits = NULL, yunits = NULL, y2units = NULL, yunits_col = NULL, y2units_col = NULL, zt = NULL, show_zt = TRUE, show_zt_every = NULL, zt_nticks = 18L, main = NULL, main_y = 1, main_yanchor = "bottom", x_nticks = 0, y_nticks = 0, show_rangeslider = NULL, slider_start = NULL, slider_end = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), font_size = 16, yfill = "none", y2fill = "none", fill_alpha = .2, yline_width = 2, y2line_width = 2, x_showspikes = TRUE, spike_dash = "solid", spike_col = NULL, x_spike_thickness = -2, tickfont_size = 16, x_tickmode = "auto", x_tickvals = NULL, x_ticktext = NULL, x_tickangle = NULL, # legend legend_x = 0, legend_y = 1.1, legend_xanchor = "left", legend_yanchor = "top", legend_orientation = "h", margin = list(l = 75, r = 75, b = 75, t = 75), # axis labels x_standoff = 20L, y_standoff = 20L, y2_standoff = 20L, hovermode = "x", # config displayModeBar = TRUE, modeBar_file_format = "svg", scrollZoom = TRUE, filename = NULL, file_width = 960, file_height = 500, file_scale = 1 ) { # Names ---- .xname <- labelify(gsub(".*\\$", "", deparse(substitute(x)))) .x2name <- labelify(gsub(".*\\$", "", deparse(substitute(x2)))) if (!is.null(x2) && .xname != .x2name) { .xname <- NULL } .yname <- labelify(gsub(".*\\$", "", deparse(substitute(y)))) .y2name <- labelify(gsub(".*\\$", "", deparse(substitute(y2)))) # Data ---- # Data to lists if (!is.null(y2) && is.null(x2)) { x2 <- x } if (!is.list(x)) { x <- list(x) } if (!is.list(y)) { y <- list(y) } if (!is.null(y2) && !is.list(y2)) { y2 <- list(y2) } if (!is.null(y2) && !is.list(x2)) { x2 <- list(x2) } # Recycle x and x2 as needed if (length(y) > 1 && length(x) == 1) { x <- rep(x, length(y)) } if (!is.null(y2) && length(y2) > 1 && length(x2) == 1) { x2 <- rep(x2, length(y2)) } if (length(x) != length(y)) { cli::cli_abort("{.arg x} and {.arg y} must be the same length") } if (!is.null(y2) && length(x2) != length(y2)) { cli::cli_abort("{.arg x2} and {.arg y2} must be the same length") } # Which traces to plot ---- # By default, plot up to two for each y axis if (is.null(which_xy)) { if (length(x) > 2) { x <- x[1:2] y <- y[1:2] } } else { x <- x[which_xy] y <- y[which_xy] } if (is.null(which_xy2)) { if (length(x2) > 2) { x2 <- x2[1:2] y2 <- y2[1:2] } } else { x2 <- x2[which_xy2] y2 <- y2[which_xy2] } # Rangeslider ---- if (is.null(show_rangeslider)) { show_rangeslider <- length(x[[1]]) > 500 } # Check args ---- if (!is.null(shade_bin) && !is.null(shade_interval)) { cli::cli_abort( "Only set {.arg shade_bin} or {.arg shade_interval}, not both" ) } # Names ---- if (is.null(ynames)) { ynames <- if (is.null(names(y))) { if (length(y) > 1) { paste(.yname, seq_along(y), sep = "_") } else { .yname } } else { names(y) } } if (!is.null(y2) && is.null(y2names)) { y2names <- if (is.null(names(y2))) { if (length(y2) > 1) { paste(.y2name, seq_along(y2), sep = "_") } else { .y2name } } else { names(y2) } } # Add units if (!is.null(yunits)) { if (is.null(yunits_col)) { yunits_col <- if (length(y) == 1) { palette[[1]] } else { "#00ff00" } } yunits <- paste0( "(", '', yunits, "", ")" ) ynames <- paste(ynames, yunits) } if (!is.null(y2units)) { if (is.null(y2units_col)) { y2units_col <- if (length(y2) == 1) { palette[[length(x) + 1]] } else { "#ff0000" } } y2units <- paste0( "(", '', y2units, "", ")" ) y2names <- paste(y2names, y2units) } # Theme ---- check_is_S7(theme, Theme) bg <- plotly::toRGB(theme[["bg"]]) plot_bg <- plotly::toRGB(theme[["plot_bg"]]) grid_col <- plotly::toRGB(theme[["grid_col"]], theme[["grid_alpha"]]) tick_col <- plotly::toRGB(theme[["tick_col"]]) legend_col <- labs_col <- plotly::toRGB(theme[["labs_col"]]) main_col <- plotly::toRGB(theme[["main_col"]]) if (!theme[["axes_visible"]]) { tick_col <- labs_col <- "transparent" } if (is.null(spike_col)) { spike_col <- theme[["fg"]] } zero_col <- adjustcolor(theme[["zerolines_col"]], theme[["zerolines_alpha"]]) # Colors ---- # if (is.null(line1.fill.col)) line1.fill.col <- plotly::toRGB(line1.col, alpha = 0.4) # if (is.null(line2.fill.col) && !is.null(y2)) { # line2.fill.col <- plotly::toRGB(line2.col, alpha = 0.4) # } palette_y <- palette[seq_along(y)] palette_y2 <- palette[length(y) + seq_along(y2)] if (length(y) > 1 && length(yfill) == 1) { yfill <- rep(yfill, length(y)) } stopifnot(length(yfill) == length(y)) if (length(y2) > 1 && length(y2fill) == 1) { y2fill <- rep(y2fill, length(y2)) } if (!is.null(y2)) { stopifnot(length(y2fill) == length(y2)) } # Fonts ---- f <- list( family = theme[["font_family"]], size = font_size, color = labs_col ) tick_font <- list( family = theme[["font_family"]], size = tickfont_size, color = theme[["tick_labels_col"]] ) # Calculate shade_interval from shade_bin ---- if (!is.null(shade_bin)) { shade_bin_p <- c(0, shade_bin, 0) shade_bin_starts <- which(diff(shade_bin_p) == 1) shade_bin_ends <- which(diff(shade_bin_p) == -1) shade_interval <- lapply( seq_along(shade_bin_starts), \(i) c(shade_bin_starts[i], shade_bin_ends[i]) ) } # zt ---- if (show_zt && !is.null(zt)) { x_tickmode <- "array" if (is.null(show_zt_every)) { # Get periodicity of ZT idi0 <- which(zt == 0) # Get differences between 0s diff_idi0 <- diff(idi0)[1] # Pick show.zt.every to be perfect divisor of diff_idi0 so that total length is closest to zt.nticks # a) diff_idi0 %% show.zt.every must be 0 # b) length(zt) / show.zt.every must be closest to zt.nticks sze <- round(length(zt) / zt_nticks) i <- 0 # if diff_idi0 %% sze != 0, search for closest integer above or below sze sze_high <- sze_low <- sze while (diff_idi0 %% sze_low != 0) { sze_low <- sze_low - 1 } while (diff_idi0 %% sze_high != 0) { sze_high <- sze_high + 1 } show_zt_every <- c(sze_low, sze_high)[which.min(abs(c( sze - sze_low, sze - sze_high )))] } idi <- seq(1, length(zt), by = show_zt_every) # Make sure 0 is included while (!0 %in% zt[idi]) { idi <- idi + 1 } idi <- idi[idi <= length(zt)] x_tickvals <- x[[1]][idi] x_ticktext <- zt[idi] if (is.null(xlab)) xlab <- "ZT" } # Plot ---- plt <- plotly::plot_ly(type = "scatter", mode = "lines") # Shade intervals ---- if (!is.null(shade_interval)) { if (is.null(shade_x)) { shade_x <- x[[1]] } if (is.null(shade_col)) { shade_col <- plotly::toRGB(theme[["fg"]], 0.15) } ymax <- max(unlist(y), unlist(y2)) # Draw shaded rectangles for (i in seq_along(shade_interval)) { plt <- plotly::add_trace( plt, x = c( shade_x[shade_interval[[i]][1]], shade_x[shade_interval[[i]][2]], shade_x[shade_interval[[i]][2]], shade_x[shade_interval[[i]][1]] ), y = c(0, 0, ymax, ymax), fill = "toself", fillcolor = shade_col, line = list(color = "transparent"), yaxis = "y", xaxis = "x", name = shade_name, legendgroup = if (shade_showlegend) shade_name else NULL, showlegend = shade_showlegend && i == 1 ) } } # /shade.interval for (i in seq_along(y)) { plt <- plotly::add_trace( plt, x = x[[i]], y = y[[i]], line = list(color = palette_y[[i]], width = yline_width), fill = yfill[[i]], fillcolor = plotly::toRGB(palette_y[[i]], alpha = fill_alpha), name = ynames[[i]], legendgroup = if (!is.null(y2)) "legend_y" else NULL ) } # /y scatter if (!is.null(y2)) { for (i in seq_along(y2)) { plt <- plotly::add_trace( plt, x = x2[[i]], y = y2[[i]], line = list(color = palette_y2[[i]], width = y2line_width), fill = y2fill[[i]], fillcolor = plotly::toRGB(palette_y2[[i]], alpha = fill_alpha), name = y2names[[i]], legendgroup = "legend_y2", yaxis = "y2" ) } } # /y2 scatter # Labels ---- if (is.null(xlab)) { xlab <- .xname } if (!is.null(xunits)) { xlab <- paste0(xlab, " (", xunits, ")") } if (!is.null(yunits)) { ylab <- if (is.null(ylab)) { if (length(y) == 1) { ynames } else { yunits } } else { paste(ylab, yunits) } } if (!is.null(y2units)) { y2lab <- if (is.null(y2lab)) { if (length(y2) == 1) { y2names } else { y2units } } else { paste(y2lab, y2units) } } # Layout ---- plt <- plotly::layout( plt, xaxis = list( title = list( text = xlab, standoff = x_standoff, font = f ), nticks = x_nticks, showspikes = x_showspikes, spikedash = spike_dash, spikecolor = spike_col, spikethickness = x_spike_thickness, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickmode = x_tickmode, tickvals = x_tickvals, ticktext = x_ticktext, tickangle = x_tickangle, tickcolor = tick_col, tickfont = tick_font, zeroline = theme[["zerolines"]], zerolinecolor = zero_col, zerolinewidth = theme[["zerolines_lwd"]] ), # /layout > xaxis yaxis = list( title = list( text = ylab, standoff = y_standoff, font = f ), nticks = y_nticks, showgrid = theme[["grid"]], gridcolor = grid_col, gridwidth = theme[["grid_lwd"]], tickcolor = tick_col, tickfont = tick_font, zeroline = theme[["zerolines"]], zerolinecolor = zero_col, zerolinewidth = theme[["zerolines_lwd"]], standoff = y_standoff ), # /layout > yaxis title = list( text = main, font = list( family = theme[["font_family"]], size = font_size, color = main_col ), xref = "paper", x = theme[["main_adj"]], yref = "paper", y = main_y, yanchor = main_yanchor ), legend = list( x = legend_x, y = legend_y, xanchor = legend_xanchor, yanchor = legend_yanchor, font = list( family = theme[["font_family"]], size = font_size, color = legend_col ), orientation = legend_orientation, bgcolor = "#ffffff00" ), # /layout > legend paper_bgcolor = bg, plot_bgcolor = plot_bg, margin = margin, hovermode = hovermode ) # /layout if (!is.null(y2)) { plt <- plt |> plotly::layout( yaxis2 = list( overlaying = "y", side = "right", title = list( text = y2lab, standoff = y2_standoff, font = f ), tickfont = tick_font ) ) } # /yaxis2 layout # Config ---- plt <- plotly::config( plt, displaylogo = FALSE, displayModeBar = displayModeBar, toImageButtonOptions = list( format = modeBar_file_format, width = file_width, height = file_height ), scrollZoom = scrollZoom ) # /config # Write to file ---- if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } # /export_plotly # Rangeslider ---- if (show_rangeslider) { if (is.null(slider_start)) { slider_start <- x[[1]][1] } if (is.null(slider_end)) { idi <- min(500, length(x[[1]])) slider_end <- x[[1]][idi] } plt <- plt |> plotly::rangeslider(start = slider_start, end = slider_end) } # /rangeslider return(plt) } # /rtemis::draw_xt # tickmode = "array", tickvals: placement, ticktext: labels ================================================ FILE: R/fmt.R ================================================ # fmt.R # ::rtemis:: # 2025 EDG rtemis.org # %% fmt ---- #' Text formatting #' #' Formats text with specified color, styles, and background using ANSI escape codes or HTML, with support for plain text output. #' #' @param x Character: Text to format. #' @param col Optional Character: Color using hex code or name. If NULL, no color is applied. #' @param bold Logical: If TRUE, make text bold. #' @param italic Logical: If TRUE, make text italic. #' @param underline Logical: If TRUE, underline text. #' @param thin Logical: If TRUE, make text thin/light. #' @param muted Logical: If TRUE, make text muted/dimmed. #' @param bg Optional Character: Background color using hex code or name. If NULL, no background #' color is applied. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with specified styling. #' #' @details #' This function combines multiple formatting options into a single call, #' making it more efficient than nested function calls. It generates #' optimized ANSI escape sequences and clean HTML output. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' # Simple color #' fmt("Hello", col = "red") #' #' # Bold red text #' fmt("Error", col = "red", bold = TRUE) #' #' # Multiple styles #' fmt("Warning", col = "yellow", bold = TRUE, italic = TRUE) #' #' # With background #' fmt("Highlight", col = "white", bg = "blue", bold = TRUE) fmt <- function( x, col = NULL, bold = FALSE, italic = FALSE, underline = FALSE, thin = FALSE, muted = FALSE, bg = NULL, pad = 0L, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) out <- switch( output_type, "ansi" = { codes <- character() # Style codes if (bold) { codes <- c(codes, "1") } else { # Explicitly set normal weight to override message() bold default codes <- c(codes, "22") } if (thin || muted) { codes <- c(codes, "2") } # Both use dim/faint if (italic) { codes <- c(codes, "3") } if (underline) { codes <- c(codes, "4") } # Foreground color if (!is.null(col)) { tryCatch( { col_rgb <- col2rgb(col) codes <- c( codes, paste0("38;2;", col_rgb[1], ";", col_rgb[2], ";", col_rgb[3]) ) }, error = function(e) { warning("Invalid color '", col, "', ignoring color") } ) } # Background color if (!is.null(bg)) { tryCatch( { bg_rgb <- col2rgb(bg) codes <- c( codes, paste0("48;2;", bg_rgb[1], ";", bg_rgb[2], ";", bg_rgb[3]) ) }, error = function(e) { warning("Invalid background color '", bg, "', ignoring background") } ) } # Generate ANSI sequence if (length(codes) > 0) { paste0("\033[", paste(codes, collapse = ";"), "m", x, "\033[0m") } else { x } }, "html" = { styles <- character() # Colors if (!is.null(col)) { styles <- c(styles, paste0("color: ", col)) } if (!is.null(bg)) { styles <- c(styles, paste0("background-color: ", bg)) } # Styles if (bold) { styles <- c(styles, "font-weight: bold") } if (thin) { styles <- c(styles, "font-weight: lighter") } if (muted) { styles <- c(styles, "color: gray") } # Override color for muted if (italic) { styles <- c(styles, "font-style: italic") } if (underline) { styles <- c(styles, "text-decoration: underline") } # Generate HTML span if (length(styles) > 0) { paste0( '', x, "" ) } else { x } }, "plain" = x ) # /switch if (pad > 0L) { out <- paste0(strrep(" ", pad), out) } out } # /rtemis::fmt # %% highlight ---- #' Highlight text #' #' A `fmt()` convenience wrapper for highlighting text. #' #' @param x Character: Text to highlight. #' @param pad Integer: Number of spaces to pad before text. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with highlight. #' #' @author EDG #' @keywords internal #' @noRd highlight <- function( x, pad = 0L, output_type = c("ansi", "html", "plain") ) { fmt(x, col = highlight_col, bold = TRUE, pad = pad, output_type = output_type) } # /rtemis::highlight # %% highlight2 ---- highlight2 <- function( x, output_type = c("ansi", "html", "plain") ) { fmt(x, col = highlight2_col, bold = FALSE, output_type = output_type) } # /rtemis::highlight2 # %% bold ---- #' Make text bold #' #' A `fmt()` convenience wrapper for making text bold. #' #' @param text Character: Text to make bold #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with bold styling #' #' @author EDG #' @keywords internal #' @noRd bold <- function(text, output_type = c("ansi", "html", "plain")) { fmt(text, bold = TRUE, output_type = output_type) } # /rtemis::bold # %% italic ---- #' Make text italic #' #' A `fmt()` convenience wrapper for making text italic. #' #' @param text Character: Text to make italic #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with italic styling #' #' @author EDG #' @keywords internal #' @noRd italic <- function(text, output_type = c("ansi", "html", "plain")) { fmt(text, italic = TRUE, output_type = output_type) } # /rtemis::italic # %% underline ---- #' Make text underlined #' #' A `fmt()` convenience wrapper for making text underlined. #' #' @param text Character: Text to underline #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with underline styling #' #' @author EDG #' @keywords internal #' @noRd underline <- function(text, output_type = c("ansi", "html", "plain")) { fmt(text, underline = TRUE, output_type = output_type) } # /rtemis::underline # %% thin ---- #' Make text thin/light #' #' A `fmt()` convenience wrapper for making text thin/light. #' #' @param text Character: Text to make thin #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with thin/light styling #' #' @author EDG #' @keywords internal #' @noRd thin <- function(text, output_type = c("ansi", "html", "plain")) { fmt(text, thin = TRUE, output_type = output_type) } # /rtemis::thin # %% muted ---- #' Muted text #' #' A `fmt()` convenience wrapper for making text muted. #' #' @param x Character: Text to format #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with muted styling #' #' @author EDG #' @keywords internal #' @noRd muted <- function(x, output_type = c("ansi", "html", "plain")) { fmt(x, muted = TRUE, output_type = output_type) } # /rtemis::muted # %% gray ---- #' Gray text #' #' A `fmt()` convenience wrapper for making text gray. #' #' @param x Character: Text to format #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with gray styling #' #' @details #' Can be useful in contexts where muted is not supported. #' #' @author EDG #' @keywords internal #' @noRd gray <- function(x, output_type = c("ansi", "html", "plain")) { fmt(x, col = "#808080", output_type = output_type) } # /rtemis::gray # %% col256 ---- #' Apply 256-color formatting #' #' @param text Character: Text to color #' @param col Character or numeric: Color (ANSI 256-color code, hex for HTML) #' @param bg Logical: If TRUE, apply as background color #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with 256-color styling #' #' @author EDG #' @keywords internal #' @noRd col256 <- function( text, col = "79", bg = FALSE, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) switch( output_type, "ansi" = { if (bg) { paste0("\033[48;5;", col, "m", text, "\033[0m") } else { paste0("\033[38;5;", col, "m", text, "\033[0m") } }, "html" = { # Convert ANSI color codes to hex colors if needed hex_col <- if ( is.numeric(col) || (is.character(col) && !grepl("^#", col)) ) { ansi256_to_hex(col) } else { col } if (bg) { paste0( '', text, "" ) } else { paste0('', text, "") } }, "plain" = text ) } # /rtemis::col256 # %% ansi256_to_hex ---- #' Convert ANSI 256 color code to HEX #' #' @param code Integer: ANSI 256 color code (0-255). #' #' @return Character: HEX color string. #' #' @author EDG #' @keywords internal #' @noRd ansi256_to_hex <- function(code) { code <- as.integer(code) if (is.na(code) || code < 0 || code > 255) { return("#000000") # Return black for invalid codes } # Standard and high-intensity colors (0-15) if (code < 16) { return(c( "#000000", "#cd0000", "#00cd00", "#cdcd00", "#0000ee", "#cd00cd", "#00cdcd", "#e5e5e5", "#7f7f7f", "#ff0000", "#00ff00", "#ffff00", "#5c5cff", "#ff00ff", "#00ffff", "#ffffff" )[code + 1]) } # 6x6x6 color cube (16-231) if (code >= 16 && code <= 231) { code <- code - 16 r <- floor(code / 36) g <- floor((code %% 36) / 6) b <- code %% 6 levels <- c(0, 95, 135, 175, 215, 255) # xterm levels return(grDevices::rgb( levels[r + 1], levels[g + 1], levels[b + 1], maxColorValue = 255 )) } # Grayscale ramp (232-255) gray_level <- (code - 232) * 10 + 8 grDevices::rgb( gray_level, gray_level, gray_level, maxColorValue = 255 ) } # /rtemis::ansi256_to_hex # %% fmt_gradient ---- #' Gradient text #' #' @param x Character: Text to colorize. #' @param colors Character vector: Colors to use for the gradient. #' @param bold Logical: If TRUE, make text bold. #' @param space Character {"rgb", "Lab"}: Color space for gradient interpolation. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Text with gradient color applied. #' #' @author EDG #' @keywords internal #' @noRd fmt_gradient <- function( x, colors, bold = FALSE, space = "Lab", output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) if (output_type == "plain") { return(x) } # Split text into individual characters chars <- strsplit(x, "")[[1]] n_chars <- length(chars) if (n_chars <= 1) { # For single character or empty string, use first color return(fmt(x, col = colors[1], output_type = output_type)) } # Generate gradient colors using colorRampPalette tryCatch( { gradient_colors <- grDevices::colorRampPalette(colors, space = space)( n_chars ) }, error = function(e) { warning("Invalid gradient colors, using default") x } ) # Apply gradient colors to each character gradient_chars <- character(n_chars) for (i in seq_len(n_chars)) { gradient_chars[i] <- fmt( chars[i], col = gradient_colors[i], bold = bold, output_type = output_type ) } # Combine all colored characters paste(gradient_chars, collapse = "") } # /rtemis::fmt_gradient # %% map_value_to_color ---- #' Map numeric value to color #' #' Maps a numeric value to a color based on a specified range and color palette using `fmt` #' for formatting. Useful for visualizing numeric values in text output. #' #' @param x Numeric: Value to map to a color. #' @param range Numeric vector of length 2: Minimum and maximum values for mapping. #' @param colors Character vector: Colors to use for the gradient mapping. #' @param space Character {"rgb", "Lab"}: Color space for gradient interpolation. #' @param bold Logical: If TRUE, make text bold. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted text with color corresponding to the numeric value. #' #' @author EDG #' @keywords internal #' @noRd map_value_to_color <- function( x, range = c(0, 1), colors = c("#ff9f20", "#00b2b2"), space = "Lab", bold = TRUE, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) if (output_type == "plain") { return(as.character(x)) } if (!is.numeric(x) || length(x) != 1L || is.na(x)) { cli::cli_abort("`x` must be a single non-missing numeric value.") } if (!is.numeric(range) || length(range) != 2L || anyNA(range)) { cli::cli_abort( "`range` must be a numeric vector of length 2 with no missing values." ) } if (range[1] >= range[2]) { cli::cli_abort("`range[1]` must be strictly less than `range[2]`.") } if (!is.character(colors) || length(colors) < 2L || anyNA(colors)) { cli::cli_abort( "`colors` must be a character vector of at least 2 non-missing colors." ) } # Check x is within range if (x < range[1] || x > range[2]) { cli::cli_abort( "Value {x} is out of range [{range[1]}, {range[2]}]" ) } n_colors <- 256L gradient <- tryCatch( { grDevices::colorRampPalette(colors, space = space)(n_colors) }, error = function(e) { cli::cli_abort("Invalid `colors` specification.") } ) p <- (x - range[1]) / (range[2] - range[1]) idx <- as.integer(round(p * (n_colors - 1L))) + 1L idx <- max(1L, min(n_colors, idx)) fmt( as.character(x), col = gradient[idx], bold = bold, output_type = output_type ) } # /rtemis::map_value_to_color ================================================ FILE: R/ifw.R ================================================ # ifw.R # ::rtemis:: # 2025- EDG rtemis.org #' Inverse Frequency Weighting #' #' @param y Vector: Outcome #' @param type Character: "case_weights" or "class_weights". What to return. #' @param verbosity Integer: Verbosity level. #' #' @return Numeric vector of weights. #' #' @keywords internal #' @noRd #' @author EDG #' #' @examples #' y <- factor(sample(c("A", "B"), size = 100, replace = TRUE, prob = c(.1, .9))) #' ifw(y) #' ifw(y, type = "class_weights") ifw <- function(y, type = c("case_weights", "class_weights"), verbosity = 1L) { stopifnot(is.factor(y)) type <- match.arg(type) if (verbosity > 0L) { msg( "Calculating", sub("_", " ", type), "using Inverse Frequency Weighting." ) } # Class weights ---- inverse_proportions <- 1 / (table(y) / NROW(y)) class_weights <- structure( as.numeric(inverse_proportions / min(inverse_proportions)), names = names(inverse_proportions) ) if (type == "class_weights") { out <- class_weights stopifnot(length(out) == nlevels(y)) } else { out <- class_weights[as.integer(y)] stopifnot(length(out) == length(y)) } out } # /rtemis::ifw ================================================ FILE: R/massGLM.R ================================================ # massGLM.R # ::rtemis:: # 2021- EDG rtemis.org #' Mass-univariate GLM Analysis #' #' @param x tabular data: Predictor variables. Usually a small number of covariates. #' @param y data.frame or similar: Each column is a different outcome. The function will train one #' GLM for each column of `y`. Usually a large number of features. #' @param scale_y Logical: If TRUE, scale each column of `y` to have mean 0 and sd 1. If `NULL`, #' defaults to TRUE if `y` is numeric, FALSE otherwise. #' @param center_y Logical: If TRUE, center each column of `y` to have mean 0. If `NULL`, defaults #' to TRUE if `scale_y` is TRUE, FALSE otherwise. # @param include_anova Logical: If TRUE, include ANOVA results in the summary. #' @param verbosity Integer: Verbosity level. #' #' @return `MassGLM` object. #' #' @author EDG #' @export #' #' @examples #' set.seed(2022) #' y <- rnormmat(500, 40, return_df = TRUE) #' x <- data.frame( #' x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500), #' x2 = y[[21]] + rnorm(500) #' ) #' massmod <- massGLM(x, y) #' # Print table of coefficients, p-values, etc. for all models #' summary(massmod) massGLM <- function( x, y, scale_y = NULL, center_y = NULL, # include_anova = TRUE, verbosity = 1L ) { # Init ---- start_time <- intro(verbosity = verbosity) # Check y ---- # all y columns must be numeric or all factors with 2 levels y_class <- sapply(y, class) if (y_class[1] == "numeric") { # Check all are numeric if (!all(y_class == "numeric")) { cli::cli_abort( "All columns of y must be the same type: either numeric or factors with 2 levels" ) } .family <- "gaussian" } else if (y_class[1] == "factor") { n_levels <- sapply(y, nlevels) if (!all(n_levels == 2)) { cli::cli_abort("All factor columns of y must have 2 levels") } .family <- "binomial" } else { cli::cli_abort( "All columns of y must be either numeric or factors with 2 levels. Found: {.val {y_class}}" ) } # Preprocessing ---- if (is.null(scale_y)) { scale_y <- if (y_class[1] == "numeric") { TRUE } else { FALSE } } if (is.null(center_y)) { center_y <- if (scale_y) { TRUE } else { FALSE } } if (scale_y || center_y) { y <- preprocess( y, config = setup_Preprocessor(scale = scale_y, center = center_y), verbosity = verbosity )[["preprocessed"]] } # Data ---- xnames <- colnames(x) ynames <- colnames(y) dat <- data.table(x, y) # fit1: Loop function ---- fit1 <- function(index, dat, family, ynames) { formula1 <- as.formula(paste( ynames[index], "~", paste(xnames, collapse = " + ") )) mod1 <- glm(formula1, family = family, data = dat) glm2table(list(mod1), xnames = ynames[index], include_anova = NA) } # Fit models ---- if (verbosity > 0L) { msg( "Fitting", highlight(length(ynames)), "GLMs of family", bold(.family), "with", highlight(length(xnames)), ngettext(length(xnames), "predictor", "predictors"), "each..." ) } tbls <- lapply( cli::cli_progress_along(seq_along(y), name = "GLMs", type = "tasks"), function(i) { fit1(index = i, dat = dat, family = .family, ynames = ynames) } ) tbl <- rbindlist(tbls) # MassGLM ---- # ynames should be the same as tbl[["Variable"]] # <> Check in MassGLM constructor if (!all(ynames == tbl[["Variable"]])) { cli::cli_warn(c( "The names of the outcome variables in y ({.val ynames}) do not match the names in the summary table ({.val summary[['Variable']]})", "Check the summary table." )) } outro(start_time) MassGLM( summary = tbl, ynames = ynames, xnames = xnames, coefnames = gsub("Coefficient_", "", getnames(tbl, "Coefficient")), family = .family ) } # /rtemis::massGLM ================================================ FILE: R/metrics.R ================================================ # metrics.R # ::rtemis:: # 2019- EDG rtemis.org #' Error functions #' #' Convenience functions for calculating loss. #' These can be passed as arguments to learners that support custom loss functions. #' #' @rdname error #' @param x Vector of True values #' @param y Vector of predicted values #' @param na.rm Logical: If TRUE, remove NA values before computation. #' #' @author EDG #' @keywords internal #' @noRd mae <- function(x, y, na.rm = TRUE) { error <- x - y mean(abs(error), na.rm = na.rm) } # /rtemis::mae #' @rdname error #' @keywords internal #' @noRd mse <- function(x, y, na.rm = TRUE) { error <- x - y mean(error^2, na.rm = na.rm) } # /rtemis::mse #' Weighted MSE #' #' @rdname error #' @keywords internal #' @noRd msew <- function(x, y, weights = rep(1, length(y)), na.rm = TRUE) { error <- x - y error <- error * weights mean(error^2, na.rm = na.rm) } # /rtemis::msew #' @rdname error #' @keywords internal #' @noRd rmse <- function(x, y, na.rm = TRUE) { sqrt(mse(x, y, na.rm = na.rm)) } # /rtemis::rmse #' R-squared #' #' @param x Float, vector: True values #' @param y Float, vector: predicted values #' @author EDG #' @keywords internal #' @noRd rsq <- function(x, y) { SSE <- sum((x - y)^2) # Sum of Squares due to Regression (SSR) a.k.a. Explained Sum of Squares (ESS) # SSR <- sum((mean(x) - y)^2) # Total Sum of Squares (TSS or SST) SST <- sum((x - mean(x))^2) # R-squared a.k.a. Coefficient of Determination i.e. percent variance explained 1 - (SSE / SST) } # /rtemis::rsq #' Log Loss for a binary classifier #' #' @param true_int Integer vector, {0, 1}: True labels (1 is the positive class). #' @param predicted_prob Float, vector: predicted probabilities. #' @param eps Float: Small value to prevent log(0). #' #' @author EDG #' @keywords internal #' @noRd logloss <- function(true_int, predicted_prob, eps = 1e-16) { predicted_prob <- pmax(pmin(predicted_prob, 1 - eps), eps) -mean( true_int * log(predicted_prob) + (1 - true_int) * log(1 - predicted_prob) ) } # /rtemis::logloss #' Sensitivity #' #' The first factor level is considered the positive case. #' #' @param true Factor: True labels. #' @param predicted Factor: Predicted labels. #' @param harmonize Logical: If TRUE, run `factor_harmonize` first. #' @param verbosity Integer: Verbosity level. #' #' @author EDG #' @keywords internal #' @noRd sensitivity <- function(true, predicted, harmonize = FALSE, verbosity = 1L) { if (harmonize) { predicted <- factor_harmonize(true, predicted, verbosity = verbosity) } pos_index <- true == levels(true)[1] condition_pos <- sum(pos_index) true_pos <- sum(true[pos_index] == predicted[pos_index]) true_pos / condition_pos } #' Specificity #' #' The first factor level is considered the positive case. #' #' @param true True labels #' @param predicted predicted labels #' @param harmonize Logical: If TRUE, run `factor_harmonize` first #' @param verbosity Integer: Verbosity level. #' #' @keywords internal #' @noRd specificity <- function(true, predicted, harmonize = FALSE, verbosity = 1L) { if (harmonize) { predicted <- factor_harmonize(true, predicted, verbosity = verbosity) } neg_index <- true == levels(true)[2] condition_neg <- sum(neg_index) true_neg <- sum(true[neg_index] == predicted[neg_index]) true_neg / condition_neg } #' Balanced Accuracy #' #' Balanced Accuracy of a binary classifier #' #' BAcc = .5 * (Sensitivity + Specificity) #' #' @param true Factor: True labels. #' @param predicted Factor: Predicted labels. #' @param harmonize Logical: passed to `sensitivity()` and `specificity`, which use `factor_harmonize`. #' @param verbosity Integer: Verbosity level. #' #' @keywords internal #' @noRd bacc <- function(true, predicted, harmonize = FALSE, verbosity = 1L) { 0.5 * (sensitivity( true, predicted, harmonize = harmonize, verbosity = verbosity ) + specificity( true, predicted, harmonize = harmonize, verbosity = verbosity )) } # /rtemis::bacc #' Precision (aka PPV) #' #' The first factor level is considered the positive case. #' #' @param true Factor: True labels #' @param predicted Factor: predicted labels #' @param harmonize Logical: If TRUE, run `factor_harmonize` first #' @param verbosity Integer: Verbosity level. #' #' @keywords internal #' @noRd precision <- function(true, predicted, harmonize = FALSE, verbosity = 1L) { if (harmonize) { predicted <- factor_harmonize(true, predicted, verbosity = verbosity) } tbl <- table(predicted, true) predicted_totals <- rowSums(tbl)[1] hits <- diag(tbl)[1] if (hits == 0 && predicted_totals == 0) { 1 } else { hits / predicted_totals } } # /rtemis::precision #' Factor harmonize #' #' @param reference Reference factor. #' @param x Input factor. #' @param verbosity Integer: Verbosity level. #' #' @return Factor: x with levels in the same order as reference. #' #' @author EDG #' #' @keywords internal #' @noRd factor_harmonize <- function(reference, x, verbosity = 1L) { if (!is.factor(x) || !is.factor(reference)) { cli::cli_abort("Inputs must be factors") } if (!all(levels(x) == levels(reference))) { if (!all(levels(x) %in% levels(reference))) { if (verbosity > 0L) { msg("Levels of x:") } levels(x) if (verbosity > 0L) { msg("levels of reference:") } levels(reference) cli::cli_abort("Levels of two inputs do not match") } if (verbosity > 0L) { msg("Input factor levels are not in the same order, correcting") } x <- factor(x, levels = levels(reference)) } x } # /rtemis::factor_harmonize #' F1 score #' #' Calculate the F1 score for classification: #' #' \deqn{F1 = 2 \frac{Recall \cdot Precision}{Recall + Precision}}{F1 = 2 * (Recall * Precision)/(Recall + Precision)} #' #' @param recall Float \[0, 1\]: Recall a.k.a. Sensitivity #' @param precision Float \[0, 1\]: Precision a.k.a. Positive Predictive Value #' #' @author EDG #' @keywords internal #' @noRd f1 <- function(precision, recall) { 2 * (recall * precision) / (recall + precision) } # /rtemis::f1 # auc.R # ::rtemis:: # 2019-23 EDG rtemis.org #' Area under the ROC Curve #' #' Get the Area under the ROC curve to assess classifier performance. #' #' @param true_int Integer vector: True labels of outcomes (e.g. c(0, 1, 1)) #' @param predicted_prob Numeric Vector: Probabilities or model scores #' (e.g. c(.32, .75, .63), etc) #' @param method Character {"lightAUC" "ROCR"}: Package to use. #' @param verbosity Integer: Verbosity level. #' #' @return Numeric. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' preds <- c(0.7, 0.55, 0.45, 0.25, 0.6, 0.7, 0.2) #' labels <- 2L - as.integer(factor(c("a", "a", "a", "b", "b", "b", "b"))) #' auc(labels, preds, method = "lightAUC") #' auc(labels, preds, method = "ROCR") auc <- function( true_int, predicted_prob, method = c("lightAUC", "ROCR"), verbosity = 0L ) { # Checks ---- method <- match.arg(method) check_inherits(true_int, "integer") check_float01inc(predicted_prob) # method <- match.arg(method) if (length(unique(true_int)) == 1) { return(NaN) } if (method == "lightAUC") { check_dependencies("lightAUC") auc. <- lightAUC::lightAUC(probs = predicted_prob, actuals = true_int) } else if (method == "ROCR") { check_dependencies("ROCR") .pred <- try(ROCR::prediction( predicted_prob, true_int, label.ordering = NULL )) auc. <- try(ROCR::performance(.pred, "auc")@y.values[[1]]) } if (inherits(auc., "try-error")) { auc. <- NaN } if (verbosity > 0L) { msg("AUC =", auc.) } auc. } # /rtemis::auc #' Area under the Curve by pairwise concordance #' #' Get the Area under the ROC curve to assess classifier performance using pairwise concordance #' #' The first level of `true.labels` must be the positive class, and high numbers in #' `estimated.score` should correspond to the positive class. #' #' @param estimated.score Float, Vector: Probabilities or model scores #' (e.g. c(.32, .75, .63), etc) #' @param true.labels True labels of outcomes (e.g. c(0, 1, 1)) #' @param verbosity Integer: Verbosity level. #' #' @examples #' true.labels <- factor(c("a", "a", "a", "b", "b", "b", "b")) #' estimated.score <- c(0.7, 0.55, 0.45, 0.25, 0.6, 0.7, 0.2) #' auc_pairs(estimated.score, true.labels, verbosity = 1L) #' #' @keywords internal #' @noRd auc_pairs <- function(estimated.score, true.labels, verbosity = 1L) { true.labels <- as.factor(true.labels) true.levels <- levels(true.labels) n.levels <- length(true.levels) if (n.levels == 2) { outer.diff <- outer( estimated.score[true.labels == true.levels[1]], estimated.score[true.labels == true.levels[2]], "-" ) .auc <- mean((outer.diff > 0) + .5 * (outer.diff == 0)) } else { cli::cli_abort( "Multiclass AUC does not have a unique definition and is not yet implemented" ) } if (verbosity > 0L) { msg("Positive class:", true.levels[1]) msg("AUC =", .auc) } invisible(.auc) } # /rtemis::auc_pairs #' Brier_Score #' #' Calculate the Brier_Score for classification: #' #' \deqn{BS = \frac{1}{N} \sum_{i=1}^{N} (y_i - p_i)^2}{BS = 1/N * sum_{i=1}^{N} (y_i - p_i)^2} #' #' @param true_int Integer vector, {0, 1}: True labels #' @param predicted_prob Numeric vector, \[0, 1\]: predicted probabilities #' #' @author EDG #' @keywords internal #' @noRd brier_score <- function(true_int, predicted_prob) { true_int <- clean_int(true_int) check_float01inc(predicted_prob) mean((true_int - predicted_prob)^2) } # /rtemis::brier_score #' Convert labels to integers #' #' Convert factor labels to integers where the positive class is 1 and the negative class is 0. #' #' @param x Factor: True labels. #' @param binclasspos Integer: Position of the factor level which is the positive class (binary classification only). #' #' @return Integer vector: 0, 1 where 1 is the positive class as defined by binclasspos. #' #' @author EDG #' @keywords internal #' @noRd labels2int <- function(x, binclasspos = 2L) { stopifnot(is.factor(x)) # Convert factor to 0, 1 where 1 is the positive class as defined by binclasspos if (binclasspos == 1L) { xi <- 2L - as.integer(x) } else { xi <- as.integer(x) - 1L } xi } # /rtemis::labels2int # classification_metrics() ---- #' Classification Metrics #' #' @details #' Note that auc_method = "pROC" is the only one that will output an AUC even if #' one or more predicted probabilities are NA. #' #' @param true_labels Factor: True labels. #' @param predicted_labels Factor: predicted values. #' @param predicted_prob Numeric vector: predicted probabilities. #' @param binclasspos Integer: Factor level position of the positive class in binary classification. #' @param calc_auc Logical: If TRUE, calculate AUC. May be slow in very large datasets. #' @param calc_brier Logical: If TRUE, calculate Brier_Score. #' @param auc_method Character: "lightAUC", "pROC", "ROCR". #' @param sample Character: Sample name. #' @param verbosity Integer: Verbosity level. #' #' @return `ClassificationMetrics` object. #' #' @author EDG #' @export #' #' @examples #' # Assume positive class is "b" #' true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b")) #' predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a")) #' predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39) #' #' classification_metrics(true_labels, predicted_labels, predicted_prob) #' classification_metrics(true_labels, predicted_labels, 1 - predicted_prob, binclasspos = 1L) classification_metrics <- function( true_labels, predicted_labels, predicted_prob = NULL, binclasspos = 2L, calc_auc = TRUE, calc_brier = TRUE, auc_method = "lightAUC", sample = character(), verbosity = 0L ) { # Checks ---- # Binary class probabilities only for now if (length(predicted_prob) > length(true_labels)) { predicted_prob <- NULL } n_classes <- nlevels(true_labels) # Check same levels in if (!all(levels(true_labels) == levels(predicted_labels))) { cli::cli_abort( "True and predicted labels must have the same levels, in the same order.", "\n levels(true_labels): ", paste(levels(true_labels), collapse = ", "), "\nlevels(predicted_labels): ", paste(levels(predicted_labels), collapse = ", ") ) } # Positive class ---- # For confusion table, make positive class the first factor level if (n_classes == 2 && binclasspos == 2L) { true_labels <- factor(true_labels, levels = rev(levels(true_labels))) predicted_labels <- factor( predicted_labels, levels = rev(levels(predicted_labels)) ) } true_levels <- levels(true_labels) # Levels already set so that the first level is the positive class Positive_Class <- if (n_classes == 2) true_levels[1] else NA if (verbosity > 0L) { if (n_classes == 2) { msg( "There are two outcome classes:", highlight(paste(rev(true_levels), collapse = ", ")) ) msg(" The positive class is:", highlight(Positive_Class)) } else { msg( "There are", n_classes, "classes:", highlight(paste(rev(true_levels), collapse = ", ")) ) } } tbl <- table(true_labels, predicted_labels) # attr(tbl, "dimnames") <- list(Reference = true_levels, Predicted = true_levels) names(attributes(tbl)[["dimnames"]]) <- c("Reference", "Predicted") Class <- list() Overall <- list() Class[["Totals"]] <- rowSums(tbl) Class[["Predicted_totals"]] <- colSums(tbl) Total <- sum(tbl) Class[["Hits"]] <- diag(tbl) # Class[["Misses"]] <- Class[["Totals"]] - Class[["Hits"]] Class[["Sensitivity"]] <- Class[["Hits"]] / Class[["Totals"]] Class[["Condition_negative"]] <- Total - Class[["Totals"]] Class[["True_negative"]] <- Total - Class[["Predicted_totals"]] - (Class[["Totals"]] - Class[["Hits"]]) Class[["Specificity"]] <- Class[["True_negative"]] / Class[["Condition_negative"]] Class[["Balanced_Accuracy"]] <- .5 * (Class[["Sensitivity"]] + Class[["Specificity"]]) # PPV = true positive / predicted condition positive Class[["PPV"]] <- Class[["Hits"]] / Class[["Predicted_totals"]] # NPV = true negative / predicted condition negative Class[["NPV"]] <- Class[["True_negative"]] / (Total - Class[["Predicted_totals"]]) Class[["F1"]] <- 2 * (Class[["PPV"]] * Class[["Sensitivity"]]) / (Class[["PPV"]] + Class[["Sensitivity"]]) # Binary vs Multiclass ---- if (n_classes == 2) { Overall[["Sensitivity"]] <- Class[["Sensitivity"]][1] Overall[["Specificity"]] <- Class[["Specificity"]][1] Overall[["Balanced_Accuracy"]] <- Class[["Balanced_Accuracy"]][1] Overall[["PPV"]] <- Class[["PPV"]][1] Overall[["NPV"]] <- Class[["NPV"]][1] Overall[["F1"]] <- Class[["F1"]][1] } else { Overall[["Balanced_Accuracy"]] <- mean(Class[["Sensitivity"]]) Overall[["F1"]] <- mean(Class[["F1"]]) } Overall[["Accuracy"]] <- sum(Class[["Hits"]]) / Total # Probability-based metrics ---- if (!is.null(predicted_prob) && n_classes == 2L) { # Positive class has been set to first level true_int <- 2L - as.integer(true_labels) if (calc_auc) { Overall[["AUC"]] <- auc( true_int = true_int, predicted_prob = predicted_prob, method = auc_method ) } if (calc_brier) { Overall[["Brier_Score"]] <- brier_score(true_int, predicted_prob) } # Overall[["Log loss"]] <- logloss(true_int, predicted_prob) } # Outro ---- Overall <- as.data.frame(do.call(cbind, Overall)) rownames(Overall) <- "Overall" Class <- (data.frame( Sensitivity = Class[["Sensitivity"]], Specificity = Class[["Specificity"]], Balanced_Accuracy = Class[["Balanced_Accuracy"]], PPV = Class[["PPV"]], NPV = Class[["NPV"]], F1 = Class[["F1"]] )) ClassificationMetrics( sample = sample, Confusion_Matrix = tbl, Overall = Overall, Class = Class, Positive_Class = Positive_Class ) } # /rtemis::classification_metrics # regression_metrics() ---- #' Regression Metrics #' #' @param true Numeric vector: True values. #' @param predicted Numeric vector: Predicted values. #' @param na.rm Logical: If TRUE, remove NA values before computation. #' @param sample Character: Sample name (e.g. "training", "test"). #' #' @return `RegressionMetrics` object. #' #' @author EDG #' @export #' #' @examples #' true <- rnorm(100) #' predicted <- true + rnorm(100, sd = 0.5) #' regression_metrics(true, predicted) regression_metrics <- function( true, predicted, na.rm = TRUE, sample = NULL ) { RegressionMetrics( MAE = mae(true, predicted, na.rm = na.rm), MSE = mse(true, predicted, na.rm = na.rm), RMSE = rmse(true, predicted, na.rm = na.rm), Rsq = rsq(true, predicted), sample = sample ) } # /rtemis::regression_metrics ================================================ FILE: R/msg.R ================================================ # msg.R # ::rtemis:: # 2016- EDG rtemis.org #' Get current date and time #' #' @details #' used by msgdatetime, log_to_file #' #' @param datetime_format Character: Format for the date and time. #' #' @return Character: Formatted date and time. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' datetime() datetime <- function(datetime_format = "%Y-%m-%d %H:%M:%S") { format(Sys.time(), datetime_format) } #' Message datetime() #' #' @param datetime_format Character: Format for the date and time. #' #' @return Character: Formatted date and time. #' #' @author EDG #' @keywords internal #' @noRd # Used by msg(), msg0(), msgstart() msgdatetime <- function(datetime_format = "%Y-%m-%d %H:%M:%S") { message(gray(paste0(datetime(), " ")), appendLF = FALSE) } #' Info msg #' #' @author EDG #' @keywords internal #' @noRd msg_info <- function(..., format_fn = highlight2, verbosity = 1L) { msg0(..., format_fn = format_fn, caller_id = 2, verbosity = verbosity) } #' Dispatch to the registered message sink, if any #' #' Internal helper used by `msg()`, `msg0()`, `msgstart()`, `msgdone()`. #' Returns TRUE if a sink consumed the event (caller should skip the console #' output path), FALSE if no sink is registered (caller should write to console #' as usual). #' #' @param text Character: the formatted message text (no datetime prefix). #' @param caller Character or NA: calling function name from `format_caller()`. #' @param ts Character: formatted timestamp from `datetime()`. #' @param level Character: one of `"info"`, `"start"`, `"done"`. #' #' @return Logical scalar. #' #' @author EDG #' @keywords internal #' @noRd .msg_to_sink <- function(text, caller, ts, level) { sink <- live[["msg_sink"]] if (is.null(sink)) { return(FALSE) } sink(list(text = text, caller = caller, ts = ts, level = level)) TRUE } #' Format caller #' #' @author EDG #' @keywords internal #' @noRd format_caller <- function(call_stack, call_depth, caller_id, max_char = 30L) { stack_length <- length(call_stack) if (stack_length < 2) { caller <- NA } else { call_depth <- call_depth + caller_id if (call_depth > stack_length) { call_depth <- stack_length } caller <- paste( lapply( rev(seq(call_depth)[-seq(caller_id)]), function(i) rev(call_stack)[[i]][[1]] ), collapse = ">>" ) } # do.call and similar will change the call stack, it will contain the full # function definition instead of the name alone # Capture S7 method calls if (!is.na(caller) && substr(caller, 1, 8) == "`method(") { caller <- sub("`method\\(([^,]+),.*\\)`", "\\1", caller) } if (is.function(caller)) { # Try to get function name from call stack context caller <- tryCatch( { # Get the original call stack element as character call_str <- deparse(rev(call_stack)[[rev(seq(call_depth)[ -seq(caller_id) ])[1]]]) # Extract function name from the call fn_match <- regexpr("^[a-zA-Z_][a-zA-Z0-9_\\.]*", call_str) if (fn_match > 0) { regmatches(call_str, fn_match) } else { "(fn)" } }, error = function(e) "(fn)" ) } if (is.character(caller)) { if (nchar(caller) > 30) caller <- paste0(substr(caller, 1, 27), "...") } caller } # /rtemis::format_caller #' Message with provenance #' #' Print message to output with a prefix including data and time, and calling function or full #' call stack #' #' If `msg` is called directly from the console, it will print `[interactive>]` in place of #' the call stack. #' `msg0`, similar to `paste0`, is `msg(..., sep = "")` #' #' #' @param ... Message to print #' @param caller Character: Name of calling function #' @param call_depth Integer: Print the system call path of this depth. #' @param caller_id Integer: Which function in the call stack to print #' @param newline_pre Logical: If TRUE begin with a new line. #' @param newline Logical: If TRUE end with a new line. #' @param format_fn Function: Formatting function to use on the message text. #' @param sep Character: Use to separate objects in `...` #' @param verbosity Integer: Verbosity level of the message. If 0L, does not print anything and #' returns NULL, invisibly. #' #' @return If verbosity > 0L, returns a list with call, message, and date, invisibly, otherwise #' returns NULL invisibly. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' msg("Hello") msg <- function( ..., caller = NULL, call_depth = 1L, caller_id = 1L, newline_pre = FALSE, newline = TRUE, format_fn = plain, sep = " ", verbosity = 1L ) { if (verbosity < 1L) { return(invisible(NULL)) } if (is.null(caller)) { call_stack <- as.list(sys.calls()) caller <- format_caller(call_stack, call_depth, caller_id) } # / get caller txt <- Filter(Negate(is.null), list(...)) text <- paste(txt, collapse = sep) # Sink path: hand structured event to registered sink, skip console. if (.msg_to_sink(text, caller, datetime(), "info")) { return(invisible(NULL)) } if (newline_pre) { message("") } msgdatetime() message( format_fn(text), appendLF = FALSE ) if (!is.null(caller) && !is.na(caller) && nchar(caller) > 0L) { message(plain(gray(paste0(" [", caller, "]")))) } else if (newline) { message("") } } # /rtemis::msg #' @rdname msg #' #' @keywords internal #' @noRd msg0 <- function( ..., caller = NULL, call_depth = 1, caller_id = 1, newline_pre = FALSE, newline = TRUE, format_fn = plain, sep = "", verbosity = 1L ) { if (verbosity < 1L) { return(invisible(NULL)) } if (is.null(caller)) { call_stack <- as.list(sys.calls()) caller <- format_caller(call_stack, call_depth, caller_id) } txt <- Filter(Negate(is.null), list(...)) text <- paste(txt, collapse = sep) if (.msg_to_sink(text, caller, datetime(), "info")) { return(invisible(NULL)) } if (newline_pre) { message("") } msgdatetime() message( format_fn(text), appendLF = FALSE ) if (!is.null(caller) && !is.na(caller) && nchar(caller) > 0L) { message(plain(gray(paste0(" [", caller, "]")))) } else if (newline) { message("") } } # /rtemis::msg0 #' msgstart #' #' @inheritParams msg #' #' @author EDG #' @keywords internal #' @noRd msgstart <- function( ..., newline_pre = FALSE, sep = "" ) { txt <- Filter(Negate(is.null), list(...)) text <- paste(txt, collapse = sep) if (.msg_to_sink(text, NA_character_, datetime(), "start")) { return(invisible(NULL)) } if (newline_pre) { message() } msgdatetime() message(plain(text), appendLF = FALSE) } # /rtemis::msgstart #' msgdone #' #' @inheritParams msg #' #' @author EDG #' @keywords internal #' @noRd msgdone <- function(caller = NULL, call_depth = 1, caller_id = 1, sep = " ") { if (is.null(caller)) { call_stack <- as.list(sys.calls()) caller <- format_caller(call_stack, call_depth, caller_id) } if (.msg_to_sink("done", caller, datetime(), "done")) { return(invisible(NULL)) } message(" ", appendLF = FALSE) yay(end = " ") message(gray(paste0("[", caller, "]\n")), appendLF = FALSE) } # /rtemis::msgdone # %% Message sink API --------------------------------------------------------- #' Set the rtemis message sink #' #' When set, `msg()`, `msg0()`, `msgstart()`, and `msgdone()` forward their #' structured output through `sink` instead of writing to the R console. Used #' by `rtemislive` to capture training-time messages and forward them over a #' WebSocket connection. Pass `NULL` to restore default console output. #' #' The sink function is called once per message with a single argument: a list #' with fields #' #' - `text`: character. The formatted message body (no datetime prefix). #' - `caller`: character or `NA`. Calling function as identified by #' `format_caller()`. #' - `ts`: character. Formatted timestamp (`"%Y-%m-%d %H:%M:%S"`). #' - `level`: character. One of `"info"` (`msg`/`msg0`), `"start"` #' (`msgstart`), or `"done"` (`msgdone`). #' #' When a sink is set, the console output path is **skipped** for affected #' calls. Errors thrown by the sink propagate to the caller of `msg()`. #' #' @param sink Function or `NULL`. #' #' @return Previous sink (function or `NULL`), invisibly. #' #' @author EDG #' @export #' #' @seealso [get_msg_sink()], [with_msg_sink()]. #' #' @examples #' captured <- list() #' set_msg_sink(function(m) captured[[length(captured) + 1L]] <<- m) #' # msg("hello world") # would append to `captured` #' set_msg_sink(NULL) # restore console output set_msg_sink <- function(sink) { if (!is.null(sink) && !is.function(sink)) { cli::cli_abort("`sink` must be a function or NULL.") } old <- live[["msg_sink"]] live[["msg_sink"]] <- sink invisible(old) } # /rtemis::set_msg_sink #' Get the current rtemis message sink #' #' @return The currently registered sink function, or `NULL` if none is set. #' #' @author EDG #' @export #' #' @seealso [set_msg_sink()], [with_msg_sink()]. get_msg_sink <- function() { live[["msg_sink"]] } # /rtemis::get_msg_sink #' Run code with a temporary message sink #' #' Sets `sink` for the duration of `code`, restoring the previous sink on exit #' (including on error). Useful in tests and for short-lived capture. #' #' @param sink Sink function or `NULL`. #' @param code Code to run. #' #' @return The value returned by `code`. #' #' @author EDG #' @export #' #' @seealso [set_msg_sink()], [get_msg_sink()]. #' #' @examples #' captured <- list() #' with_msg_sink( #' function(m) captured[[length(captured) + 1L]] <<- m, #' { #' # any msg() / msg0() / msgstart() / msgdone() calls in here are captured #' } #' ) with_msg_sink <- function(sink, code) { old <- set_msg_sink(sink) on.exit(set_msg_sink(old), add = TRUE) force(code) } # /rtemis::with_msg_sink ================================================ FILE: R/preprocess.R ================================================ # preprocess.R # ::rtemis:: # 2017- EDG rtemis.org # %% preprocess(x, PreprocessorConfig, ...) ---- #' @name #' preprocess #' #' @param x data.frame, data.table, tbl_df (tabular data): Data to be preprocessed. #' @param config `PreprocessorConfig`: Setup using [setup_Preprocessor] OR `Preprocessor` object: #' Output of previous run of `preprocess`. This allows, for example, applying preprocessing to a #' validation or test set using the same parameters as were used for the training set. In #' particular, the same scale centers and coefficients will be applied to the new data. #' @param dat_validation tabular data: Validation set data. #' @param dat_test tabular data: Test set data. #' @param verbosity Integer: Verbosity level. #' @param ... Not used. #' #' @author EDG #' @export preprocess.class_tabular.PreprocessorConfig <- method( preprocess, list(class_tabular, PreprocessorConfig) ) <- function( x, config, dat_validation = NULL, dat_test = NULL, verbosity = 1L ) { # -> Preprocessor # Intro ---- start_time <- intro(verbosity = verbosity - 1L) # Init values list for Preprocessor output. values <- list( scale_centers = NULL, scale_coefficients = NULL, one_hot_levels = NULL, remove_features = NULL ) # Data isdatatable <- data.table::is.data.table(x) x <- as.data.frame(x) # Complete cases ---- if (config@complete_cases) { if (verbosity > 0L) { msg("Filtering complete cases...") } x <- x[complete.cases(x), ] } # Set aside excluded ---- if (!is.null(config@exclude) && length(config@exclude) > 0) { excluded <- x[, config@exclude, drop = FALSE] excluded_names <- colnames(x)[config@exclude] x <- x[, -config@exclude, drop = FALSE] } # Remove named features ---- if (!is.null(config@remove_features)) { if (verbosity > 0L) { msg("Removing", length(config@remove_features), "features...") } values$remove_features <- config@remove_features x <- x[, !names(x) %in% config@remove_features, drop = FALSE] } # Remove constants ---- # Must be ahead of numeric quantile at least if (config@remove_constants) { constant <- which(sapply( x, is_constant, skip_missing = config@remove_constants_skip_missing )) if (length(constant) > 0) { if (verbosity > 0L) { msg0( "Removing ", singorplu(length(constant), "constant feature"), "..." ) } x <- x[, -constant] } } # Remove duplicates ---- if (config@remove_duplicates) { # Ndups <- sum(duplicated(x)) duplicate_index <- which(duplicated(x)) Ndups <- length(duplicate_index) if (Ndups > 0) { if (verbosity > 0L) { msg0("Removing ", singorplu(Ndups, "duplicate case"), "...") } x <- unique(x) } } else { duplicate_index <- NULL } # Remove Cases by missing feature threshold ---- if (!is.null(config@remove_cases_thres)) { if (anyNA(x)) { xt <- data.table::as.data.table(x) # na_fraction_bycase <- apply(x, 1, function(i) sum(is.na(i))/length(i)) na_fraction_bycase <- data.table::transpose(xt)[, lapply( .SD, function(i) { sum(is.na(i)) / length(i) } )] index_remove_cases_thres <- which( na_fraction_bycase >= config@remove_cases_thres ) if (length(index_remove_cases_thres) > 0) { if (verbosity > 0L) { msg( "Removing", length(index_remove_cases_thres), "cases with >=", config@remove_cases_thres, "missing data..." ) } xt <- xt[-index_remove_cases_thres, ] } x <- as.data.frame(xt) } } # Remove Features by missing feature threshold ---- if (!is.null(config@remove_features_thres)) { if (anyNA(x)) { xt <- data.table::as.data.table(x) na.fraction.byfeat <- xt[, lapply(.SD, function(i) { sum(is.na(i)) / length(i) })] removeFeat_thres_index <- which( na.fraction.byfeat >= config@remove_features_thres ) if (length(removeFeat_thres_index) > 0) { if (verbosity > 0L) { msg( "Removing", length(removeFeat_thres_index), "features with >=", config@remove_features_thres, "missing data..." ) } x <- x[, -removeFeat_thres_index] } } } # Integer to factor ---- index_integer <- NULL if (config@integer2factor) { index_integer <- c( which(sapply(x, is.integer)), which(sapply(x, bit64::is.integer64)) ) if (verbosity > 0L) { if (length(index_integer) > 0) { msg( "Converting", singorplu(length(index_integer), "integer"), "to factor..." ) } else { msg("No integers to convert to factor...") } } for (i in index_integer) { x[, i] <- as.factor(x[, i]) } } # Logical to factor ---- if (config@logical2factor) { index_logical <- which(sapply(x, is.logical)) if (verbosity > 0L) { if (length(index_logical) > 0) { msg0( "Converting ", singorplu(length(index_logical), "logical feature"), " to ", ngettext(length(index_logical), "factor", "factors"), "..." ) } else { msg("No logicals to convert to factor...") } } for (i in index_logical) { x[, i] <- as.factor(x[, i]) } } # Numeric to factor ---- if (config@numeric2factor) { index_numeric <- which(sapply(x, is.numeric)) if (verbosity > 0L) { msg("Converting numeric to factors...") } if (is.null(config@numeric2factor_levels)) { for (i in index_numeric) { x[, i] <- as.factor(x[, i]) } } else { for (i in index_numeric) { x[, i] <- factor(x[, i], levels = config@numeric2factor_levels) } } } # Character to factor ---- if (config@character2factor) { index_char <- which(sapply(x, is.character)) if (verbosity > 0L) { if (length(index_char) > 0) { msg0( "Converting ", singorplu(length(index_char), "character feature"), " to ", ngettext(length(index_char), "a factor", "factors"), "..." ) } else { msg("No character features to convert to factors found.") } } for (i in index_char) { x[, i] <- as.factor(x[, i]) } } # unique_len2factor ---- if (config@unique_len2factor > 1) { index_len <- which(sapply( x, \(i) length(unique(i)) <= config@unique_len2factor )) # Exclude factors index_factor <- which(sapply(x, is.factor)) index_len <- setdiff(index_len, index_factor) if (verbosity > 0L) { if (length(index_len) > 0) { msg( "Converting", singorplu(length(index_len), "feature"), "with <=", config@unique_len2factor, "unique values to factors..." ) } else { msg( "No features with <=", config@unique_len2factor, "unique values found." ) } } for (i in index_len) { x[, i] <- factor(x[, i]) } } # Integer to numeric ---- if (config@integer2numeric) { if (is.null(index_integer)) { index_integer <- c( which(sapply(x, is.integer)), which(sapply(x, bit64::is.integer64)) ) } if (verbosity > 0L) { if (length(index_integer) > 0) { msg( "Converting", singorplu(length(index_integer), "integer"), "to numeric..." ) } else { msg("No integers to convert to numeric...") } } for (i in index_integer) { x[, i] <- as.numeric(x[, i]) } } # Logical to numeric ---- if (config@logical2numeric) { index_logical <- which(sapply(x, is.logical)) if (verbosity > 0L) { msg("Converting logicals to numeric...") } for (i in index_logical) { x[, i] <- as.numeric(x[, i]) } } # Numeric cut ---- if (config@numeric_cut_n > 0) { index_numeric <- which(sapply(x, is.numeric)) if (length(index_numeric) > 0) { if (verbosity > 0L) { msg("Cutting numeric features in", config@numeric_cut_n, "bins...") } for (i in index_numeric) { x[, i] <- factor( cut( x[, i], breaks = config@numeric_cut_n, labels = config@numeric_cut_labels ) ) } } } # Numeric quantile ---- if (config@numeric_quant_n > 0) { index_numeric2q <- if (config@numeric_quant_nAonly) { index_numeric2q <- which(sapply(x, is.numeric) & sapply(x, anyNA)) } else { which(sapply(x, is.numeric)) } if (length(index_numeric2q) > 0) { if (verbosity > 0L) { msg( "Cutting numeric features in", config@numeric_quant_n, "quantiles..." ) } for (i in index_numeric2q) { rng <- abs(diff(range(x[, i], na.rm = TRUE))) quantiles <- quantile( x[, i], probs = seq(0, 1, length.out = config@numeric_quant_n), na.rm = TRUE ) quantiles[1] <- quantiles[1] - .02 * rng quantiles[config@numeric_quant_n] <- quantiles[ config@numeric_quant_n ] + .02 * rng quantiles <- unique(quantiles) x[, i] <- factor( cut( x[, i], breaks = quantiles ) ) } } } # factor NA to level ---- if (config@factorNA2missing) { index_factor <- which(sapply(x, is.factor)) if (verbosity > 0L) { if (length(index_factor) > 0) { msg0( "Converting ", length(index_factor), ngettext(length(index_factor), " factor's", " factors'"), " NA values to level '", config@factorNA2missing_level, "'..." ) } else { msg("No factors found.") } } for (i in index_factor) { x[, i] <- factor_NA2missing(x[, i], config@factorNA2missing_level) } } # Factor to integer ---- # e.g. for algorithms that do not support factors directly, but can handle integers # as categorical (e.g. LightGBM) if (config@factor2integer) { index_factor <- which(sapply(x, is.factor)) if (verbosity > 0L) { if (length(index_factor) > 0) { msg( "Converting", singorplu(length(index_factor), "factor"), "to integer..." ) } else { msg("No factors found to convert to integer...") } } if (config@factor2integer_startat0) { for (i in index_factor) { x[, i] <- as.integer(x[, i]) - 1 } } else { for (i in index_factor) { x[, i] <- as.integer(x[, i]) } } } # Missingness ---- if (config@missingness) { cols_with_na <- which(apply(x, 2, anyNA)) .colnames <- colnames(x) for (i in cols_with_na) { x[, paste0(.colnames[i], "_missing")] <- factor(as.numeric(is.na(x[, i]))) if (verbosity > 0L) { msg0("Created missingness indicator for ", .colnames[i], "...") } } } # Impute ---- if (config@impute) { if (config@impute_type == "missRanger") { # '- missRanger ---- check_dependencies("missRanger") if (verbosity > 0L) { if (config@impute_missRanger_params[["pmm.k"]] > 0) { msg( "Imputing missing values using predictive mean matching with missRanger..." ) } else { msg("Imputing missing values using missRanger...") } } x <- missRanger::missRanger( x, pmm.k = config@impute_missRanger_params[["pmm.k"]], verbose = verbosity ) } else if (config@impute_type == "micePMM") { check_dependencies("mice") if (verbosity > 0L) { msg( "Imputing missing values by predictive mean matching using mice..." ) } x <- mice::complete(mice::mice(x, m = 1, method = "pmm")) } else { # '- mean/mode ---- if (verbosity > 0L) { msg( "Imputing missing values using", config@impute_discrete, "(discrete) and", config@impute_continuous, "(continuous)..." ) } index_discrete <- which(sapply(x, function(i) is_discrete(i) && anyNA(i))) if (length(index_discrete) > 0) { for (i in index_discrete) { index <- which(is.na(x[, i])) imputed <- do_call( config@impute_discrete, list(x[[i]], na.rm = TRUE) ) x[index, i] <- imputed } } index_numeric <- which(sapply(x, function(i) is.numeric(i) && anyNA(i))) if (length(index_numeric) > 0) { for (i in index_numeric) { index <- which(is.na(x[, i])) imputed <- do_call( config@impute_continuous, list(x[[i]], na.rm = TRUE) ) x[index, i] <- imputed } } } } # Scale +/- center ---- if (config@scale || config@center) { # Get index of numeric features numeric_index <- which(sapply(x, is.numeric)) sc <- if (config@scale) "Scaling" else NULL ce <- if (config@center) "centering" else NULL if (length(numeric_index) > 0) { if (verbosity > 0L) { msg( paste(c(sc, ce), collapse = " and "), length(numeric_index), "numeric features..." ) } # Info: scale outputs a matrix. scale_ <- if (!is.null(config@scale_coefficients)) { # Check names match stopifnot(identical( names(config@scale_coefficients), names(x[, numeric_index]) )) config@scale_coefficients } else { config@scale } center_ <- if (!is.null(config@scale_centers)) { # Check names match stopifnot(identical( names(config@scale_centers), names(x[, numeric_index]) )) config@scale_centers } else { config@center } x_num_scaled <- scale( x[, numeric_index, drop = FALSE], scale = scale_, center = center_ ) # Collect scale and center values values$scale_centers <- attr(x_num_scaled, "scaled:center") values$scale_coefficients <- attr(x_num_scaled, "scaled:scale") x_num_scaled <- as.data.frame(x_num_scaled) # Insert into original dataset x[, numeric_index] <- x_num_scaled # j <- 0 # for (i in numeric_index) { # j <- j + 1 # x[, i] <- x_num_scaled[, j] # } } else { msg( paste(c(sc, ce), collapse = " and "), "was requested \n but no numeric features were found: Please check data." ) } } # One Hot Encoding ---- if (config@one_hot) { x <- one_hot( x, verbosity = verbosity, factor_levels = config@one_hot_levels ) } # Add date features ---- if (config@add_date_features) { if (verbosity > 0L) { msg("Extracting date features...") } # Find date columns date_cols <- which(sapply(x, function(col) inherits(col, "Date"))) # For each date column, extract features for (i in date_cols) { .date_features <- dates2features( x[[i]], features = config@date_features ) names(.date_features) <- paste0(names(x)[i], "_", names(.date_features)) x <- cbind(x, .date_features) } } # Add holidays ---- if (config@add_holidays) { if (verbosity > 0L) { msg("Extracting holidays...") } # Find date columns date_cols <- which(sapply(x, \(col) inherits(col, "Date"))) # For each date column, extract holidays for (i in date_cols) { .holidays <- get_holidays(x[, i]) x[[paste0(names(x)[i], "_holidays")]] <- .holidays } } # Add back excluded ---- if (!is.null(config@exclude) && length(config@exclude) > 0) { # remove any duplicates if (!is.null(duplicate_index)) { excluded <- excluded[-duplicate_index, , drop = FALSE] } # remove by case thres if ( !is.null(config@remove_cases_thres) && length(index_remove_cases_thres) > 0 ) { n_feat_inc <- NCOL(x) x <- cbind(x, excluded[-index_remove_cases_thres, ]) colnames(x)[-c(seq(n_feat_inc))] <- excluded_names } else { x <- cbind(x, excluded) } } # /add back excluded if (isdatatable) { data.table::setDT(x) } if (verbosity > 0L) { msg("Preprocessing done.") } preprocessed <- list(training = x) if (!is.null(dat_validation)) { if (verbosity > 0L) { msg("Applying preprocessing to validation data...") } prp_validation <- preprocess( x = dat_validation, config = Preprocessor( config = config, preprocessed = list(), scale_centers = values[["scale_centers"]], scale_coefficients = values[["scale_coefficients"]], one_hot_levels = values[["one_hot_levels"]], remove_features = values[["remove_features"]] ), verbosity = verbosity ) preprocessed$validation <- prp_validation@preprocessed } if (!is.null(dat_test)) { if (verbosity > 0L) { msg("Applying preprocessing to test data...") } prp_test <- preprocess( x = dat_test, config = Preprocessor( config = config, preprocessed = list(), scale_centers = values[["scale_centers"]], scale_coefficients = values[["scale_coefficients"]], one_hot_levels = values[["one_hot_levels"]], remove_features = values[["remove_features"]] ), verbosity = verbosity ) preprocessed$test <- prp_test@preprocessed } outro(start_time, verbosity = verbosity - 1L) Preprocessor( config = config, preprocessed = if (length(preprocessed) == 1) { preprocessed[[1]] } else { preprocessed }, scale_centers = values[["scale_centers"]], scale_coefficients = values[["scale_coefficients"]], one_hot_levels = values[["one_hot_levels"]], remove_features = values[["remove_features"]] ) } # /rtemis::preprocess(PreprocessorConfig, ...) # %% preprocess(x, Preprocessor, ...) ---- #' @name #' preprocess #' #' @author EDG #' @export preprocess.class_tabular.Preprocessor <- method( preprocess, list(class_tabular, Preprocessor) ) <- function( x, config, verbosity = 1L ) { # -> Preprocessor params <- config@config # Overwrite scale_centers, scale_coefficients, one_hot_levels, and remove_features params@scale_centers <- config@values[["scale_centers"]] params@scale_coefficients <- config@values[["scale_coefficients"]] params@one_hot_levels <- config@values[["one_hot_levels"]] params@remove_features <- config@values[["remove_features"]] preprocess(x, params, verbosity = verbosity) } # /rtemis::preprocess(Preprocessor, ...) # %% one_hot ---- #' @name one_hot #' #' @title #' One hot encoding #' #' @description #' One hot encode a vector or factors in a data.frame #' #' @details #' A vector input will be one-hot encoded regardless of type by looking at all unique values. With data.frame input, #' only column of type factor will be one-hot encoded. #' This function is used by [preprocess]. #' `one_hot.data.table` operates on a copy of its input. #' `one_hot_` performs one-hot encoding ***in-place***. #' #' @param x Vector or data.frame #' @param xname Character: Variable name #' @param verbosity Integer: Verbosity level. #' #' @return For vector input, a one-hot-encoded matrix, for data.frame frame #' input, an expanded data.frame where all factors are one-hot encoded #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' # factor with only one unique value but 2 levels: #' vf <- factor(rep("alpha", 20), levels = c("alpha", "beta")) #' vf_one_hot <- one_hot(vf) #' vf_one_hot method(one_hot, class_any) <- function(x, xname = NULL, verbosity = 1L) { if (is.null(xname)) { xname <- deparse(substitute(x)) } # ensures if factor without all levels present, gets all columns created if (!is.factor(x)) { x <- factor(x) } .levels <- levels(x) ncases <- NROW(x) index <- as.integer(x) oh <- matrix(0, ncases, length(.levels)) colnames(oh) <- paste(xname, .levels, sep = "_") for (i in seq(ncases)) { oh[i, index[i]] <- 1 } oh } # /rtemis::one_hot.default # included for benchmarking mostly one_hotcm <- function( x, xname = deparse(substitute(x)), return = "data.frame" ) { stopifnot(is.factor(x)) dt <- data.table( ID = seq_along(x), x = x ) setnames(dt, "x", xname) out <- dcast( melt(dt, id.vars = "ID"), ID ~ variable + value, fun.aggregate = length )[, -1] if (return == "data.frame") { setDF(out) } out } # loop is faster than dcast/melt # x <- iris$Species # microbenchmark::microbenchmark(loop = one_hot.default(x), dt = one_hotcm(x)) # %% one_hot.data.frame ---- #' @rdname one_hot #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' one_hot(iris) |> head() method(one_hot, class_data.frame) <- function( x, factor_levels = NULL, verbosity = 1L ) { ncases <- NROW(x) factor_index <- which(sapply(x, is.factor)) # If factor_levels list is provided, check column names match if (!is.null(factor_levels)) { stopifnot(identical(names(factor_levels), colnames(x[, factor_index]))) } one.hot <- as.list(x) if (verbosity > 0L) { .names <- colnames(x) } for (i in factor_index) { if (verbosity > 0L) { msgstart("One hot encoding ", .names[i], "...") } .levels <- if (!is.null(factor_levels)) { factor_levels[[i]] } else { levels(x[[i]]) } index <- as.integer(x[, i]) oh <- matrix(0, ncases, length(.levels)) colnames(oh) <- paste0(names(x)[i], "_", .levels) for (j in seq(ncases)) { oh[j, index[j]] <- 1 } # Replace list element that was a factor with one-hot encoded matrix one.hot[[i]] <- oh } if (verbosity > 0L) { msgdone() } # do.call below creates a matrix, maintaining column names in one.hot matrix. # as.data.frame on one.hot would have added {name_of_oh_element}.{column_names} as.data.frame(do.call(cbind, one.hot)) } # /rtemis::one_hot.data.frame # %% one_hot.data.table ---- #' @rdname one_hot #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' ir <- data.table::as.data.table(iris) #' ir_oh <- one_hot(ir) #' ir_oh method(one_hot, class_data.table) <- function(x, verbosity = 1L) { x <- copy(x) ncases <- NROW(x) factor_index <- which(sapply(x, is.factor)) .names <- colnames(x) for (i in factor_index) { if (verbosity > 0L) { msg_info("One hot encoding ", .names[i], "...") } .levels <- levels(x[[i]]) index <- as.integer(x[[i]]) oh <- as.data.table(matrix(0, ncases, length(.levels))) .colnames <- colnames(oh) <- .levels for (k in seq_along(.levels)) { oh[index == k, (.colnames[k]) := 1] } x[, (paste(.names[i], .levels, sep = "_")) := oh] } # remove original factor(s) x[, paste(.names[factor_index]) := NULL] if (verbosity > 0L) { msg("Done") } invisible(x) } # /rtemis::one_hot.data.table #' Convert data.table's factor to one-hot encoding ***in-place*** #' #' @param x data.table: Input data.table. Will be modified ***in-place***. #' @param xname Character, optional: Dataset name. #' @param verbosity Integer: Verbosity level. #' #' @return The input, invisibly, after it has been modified ***in-place***. #' #' @author EDG #' @export #' #' @examples #' ir <- data.table::as.data.table(iris) #' # dt_set_one_hot operates ***in-place***; therefore no assignment is used: #' dt_set_one_hot(ir) #' ir dt_set_one_hot <- function(x, xname = NULL, verbosity = 1L) { if (is.null(xname)) { xname <- deparse(substitute(x)) } ncases <- NROW(x) factor_index <- which(sapply(x, is.factor)) .names <- colnames(x) for (i in factor_index) { if (verbosity > 0L) { msg_info("One hot encoding ", .names[i], "...") } .levels <- levels(x[[i]]) index <- as.numeric(x[[i]]) oh <- as.data.table(matrix(0, ncases, length(.levels))) .colnames <- colnames(oh) <- paste(xname, .levels, sep = "_") for (k in seq_along(.levels)) { oh[index == k, (.colnames[k]) := 1] } x[, (paste(.names[i], .levels, sep = "_")) := oh] } # remove original factor(s) x[, paste(.names[factor_index]) := NULL] if (verbosity > 0L) { msg("Done") } invisible(x) } # /rtemis::dt_set_one_hot #' Convert one-hot encoded matrix to factor #' #' @details If input has a single column, it will be converted to factor and #' returned #' #' @param x one-hot encoded matrix or data.frame. #' @param labels Character vector of level names. #' #' @return A factor. #' #' @author EDG #' @export #' #' @examples #' x <- data.frame(matrix(FALSE, 10, 3)) #' colnames(x) <- c("Dx1", "Dx2", "Dx3") #' x$Dx1[1:3] <- x$Dx2[4:6] <- x$Dx3[7:10] <- TRUE #' one_hot2factor(x) one_hot2factor <- function(x, labels = colnames(x)) { if (NCOL(x) == 1) { return(factor(x)) } if (any(na.exclude(rowSums(x)) > 1)) { cli::cli_abort("Input must be one-hot encoded.") } out <- factor(rep(NA, NROW(x)), levels = labels) for (i in seq_along(labels)) { out[x[, i] == 1] <- labels[i] } out } # /rtemis::one_hot2factor #' Binary matrix times character vector #' #' @param x A binary matrix or data.frame #' @param labels Character vector length equal to `ncol(x)` #' #' @return a character vector #' #' @author EDG #' @export `%BC%` <- function(x, labels) { if (NCOL(x) == 1) { return(factor(x)) } dt <- as.data.table(x) fn <- \(r) paste(unique(labels[which(r == 1)]), collapse = ",") out <- dt[, list(fn(.SD)), by = seq_len(NROW(dt))][[2]] out[out == ""] <- NA out } # /rtemis::`%BC%` #' Binary matrix to list vector #' #' @author EDG #' @keywords internal #' @noRd binmat2lvec <- function(x, labels = colnames(x), return.list = FALSE) { if (NCOL(x) == 1) { return(factor(x)) } dt <- as.data.table(x) if (return.list) { fn <- \(r) list(labels[which(r == 1)]) out <- dt[, list(fn(.SD)), by = seq_len(NROW(dt))][[2]] out[sapply(out, length) == 0] <- NA } else { fn <- \(r) paste(unique(labels[which(r == 1)]), collapse = ",") out <- dt[, list(fn(.SD)), by = seq_len(NROW(dt))] out[out == ""] <- NA } out } # /rtemis::binmat2lvec # %% feature_matrix ---- #' Convert tabular data to feature matrix #' #' Convert a tabular dataset to a matrix, one-hot encoding factors, if present. #' #' @details #' This is a convenience function that uses [features()], [preprocess()], `as.matrix()`. #' #' @param x tabular data: Input data to convert to a feature matrix. #' #' @return Matrix with features. Factors are one-hot encoded, if present. #' #' @author EDG #' @export #' #' @examples #' # reorder columns so that we have a categorical feature #' x <- set_outcome(iris, "Sepal.Length") #' feature_matrix(x) |> head() feature_matrix <- function(x) { x |> features() |> preprocess(setup_Preprocessor(one_hot = TRUE)) |> preprocessed() |> as.matrix() } # /rtemis::feature_matrix ================================================ FILE: R/present.R ================================================ # present.R # ::rtemis:: # 2025 EDG rtemis.org #' Present list of Supervised or SupervisedRes objects #' #' Plot training and testing performance boxplots of multiple `Supervised` or `SupervisedRes` objects #' #' @param x List of `Supervised` or `SupervisedRes` objects. #' @param metric Character: Metric to plot. #' @param model_names Character: Names of models being plotted. #' @param ylim Numeric vector of length 2: y-axis limits for the boxplots. #' @param theme `Theme` object. #' @param boxpoints Character: "all", "outliers", or "suspectedoutliers". Determines how points are #' displayed in the boxplot. #' @param filename Character: Filename to save the plot to. #' @param file_width Numeric: Width of the exported image in pixels. #' @param file_height Numeric: Height of the exported image in pixels. #' @param file_scale Numeric: Scale factor for the exported image. #' @param verbosity Integer: Verbosity level. #' #' @return plotly object #' #' @author EDG #' @noRd #' #' @examples #' \dontrun{ #' iris_lightrf <- train( #' iris, #' algorithm = "lightrf", #' outer_resampling_config = setup_Resampler(seed = 2026) #' ) #' iris_rsvm <- train( #' iris, #' algorithm = "radialsvm", #' outer_resampling_config = setup_Resampler(seed = 2026) #' ) #' present(list(iris_lightrf, iris_rsvm), metric = "Balanced_Accuracy") #' } method(present, class_list) <- function( x, metric = NULL, model_names = NULL, ylim = NULL, theme = choose_theme(getOption("rtemis_theme")), boxpoints = "all", filename = NULL, file_width = 800, file_height = 600, file_scale = 1, verbosity = 1L ) { # Check that all elements of x are either Supervised or SupervisedRes objects all_supervised <- all(sapply(x, function(m) { S7_inherits(m, Supervised) })) all_supervisedres <- all(sapply(x, function(m) { S7_inherits(m, SupervisedRes) })) if (!(all_supervised || all_supervisedres)) { cli::cli_abort( "Input must be a list of Supervised or SupervisedRes objects." ) } # Check all models are of the same type type <- unique(sapply(x, function(m) m@type)) if (length(type) > 1) { cli::cli_abort("All models must be of the same type") } # Describe if (verbosity > 0L) { describe(x) } # Get names if (is.null(model_names)) { model_names <- sapply(x, function(m) { m@algorithm }) } # If any names are duplicated, append a number if (any(duplicated(model_names))) { model_names <- make.unique(model_names, sep = "_") } # Metric if (is.null(metric)) { metric <- switch( type, Classification = "Balanced_Accuracy", Regression = "Rsq" ) } # Data xl_training <- lapply(x, function(m) { get_metric(m, set = "training", metric = metric) }) xl_test <- lapply(x, function(m) { get_metric(m, set = "test", metric = metric) }) names(xl_training) <- names(xl_test) <- model_names # Plots if (all_supervisedres) { # Get ylim if (is.null(ylim)) { ylim <- range(c(xl_training, xl_test), na.rm = TRUE) } plot_training <- draw_box( xl_training, ylab = labelify(paste("Training", metric)), ylim = ylim, theme = theme, boxpoints = boxpoints ) plot_test <- draw_box( xl_test, ylab = labelify(paste("Test", metric)), ylim = ylim, theme = theme, boxpoints = boxpoints ) plt <- plotly::subplot( plot_training, plot_test, nrows = 2L, shareX = TRUE, shareY = FALSE, titleX = TRUE, titleY = TRUE, margin = 0.05 ) } else { # rows are groups, columns are features xdf_training <- as.data.frame(xl_training) xdf_test <- as.data.frame(xl_test) xdf <- t(rbind(xdf_training, xdf_test)) colnames(xdf) <- c("Training", "Test") plt <- draw_bar(xdf, ylab = labelify(metric), theme = theme) } if (!is.null(filename)) { export_plotly( plt, filename = filename, width = file_width, height = file_height, scale = file_scale ) } plt } # /rtemis::present.list ================================================ FILE: R/read.R ================================================ # read.R # ::rtemis:: # 2022- EDG rtemis.org # %% read ---- #' Read tabular data from a variety of formats #' #' Read data and optionally clean column names, keep unique rows, and convert #' characters to factors #' #' @details #' `read` is a convenience function to read: #' #' - **Delimited** files using `data.table:fread()`, `arrow:read_delim_arrow()`, #' `vroom::vroom()`, or `duckdb::duckdb_read_csv()` #' - **ARFF** files using `farff::readARFF()` #' - **Parquet** files using `arrow::read_parquet()` #' - **XLSX** files using `readxl::read_excel()` #' - **DTA** files from Stata using `haven::read_dta()` #' - **FASTA** files using `seqinr::read.fasta()` #' - **RDS** files using `readRDS()` #' #' @param filename Character: filename or full path if `datadir = NULL`. #' @param datadir Character: Optional path to directory where `filename` #' is located. If not specified, `filename` must be the full path. #' @param make_unique Logical: If TRUE, keep unique rows only. #' @param character2factor Logical: If TRUE, convert character variables to #' factors. #' @param clean_colnames Logical: If TRUE, clean columns names using #' [clean_colnames]. #' @param delim_reader Character: package to use for reading delimited data. #' @param xlsx_sheet Integer or character: Name or number of XLSX sheet to read. #' @param sep Single character: field separator. If `delim_reader = "fread"` #' and `sep = NULL`, this defaults to "auto", otherwise defaults to ",". #' @param quote Single character: quote character. #' @param na_strings Character vector: Strings to be interpreted as NA values. #' For `delim_reader = "duckdb"`, this must be a single string. #' @param output Character: "default" or "data.table", If default, return the delim_reader's #' default data structure, otherwise convert to data.table. #' @param attr Character: Attribute to set (Optional). #' @param value Character: Value to set (if `attr` is not NULL). #' @param verbosity Integer: Verbosity level. #' @param fread_verbosity Integer: Verbosity level. Passed to `data.table::fread` #' @param timed Logical: If TRUE, time the process and print to console #' @param ... Additional arguments to pass to `data.table::fread`, #' `arrow::read_delim_arrow()`, `vroom::vroom()`, #' or `readxl::read_excel()`. #' #' @return data.frame, data.table, or tibble. #' #' @author EDG #' @export #' #' @examples #' \dontrun{ #' # Replace with your own data directory and filename #' datadir <- "/Data" #' dat <- read("iris.csv", datadir) #' } read <- function( filename, datadir = NULL, make_unique = FALSE, character2factor = FALSE, clean_colnames = TRUE, delim_reader = c("data.table", "vroom", "duckdb", "arrow"), xlsx_sheet = 1, sep = NULL, quote = "\"", na_strings = c(""), output = c("data.table", "tibble", "data.frame"), attr = NULL, value = NULL, verbosity = 1L, fread_verbosity = 0L, timed = verbosity > 0L, ... ) { check_dependencies("data.table") if (timed) { start_time <- intro(verbosity = 0L) } delim_reader <- match.arg(delim_reader) output <- match.arg(output) if (output == "tibble") { check_dependencies("tibble") } ext <- tools::file_ext(filename) path <- if (is.null(datadir)) { filename } else { file.path(datadir, filename) } path <- path.expand(path) # Sanitize path for security path <- sanitize_path(path, must_exist = FALSE) if (ext == "parquet") { check_dependencies("arrow") msg0( bold(highlight("\u25B6")), " Reading ", highlight(basename(path)), " using arrow::read_parquet()...", verbosity = verbosity ) .dat <- arrow::read_parquet(path, ...) } else if (ext == "rds") { msg0( bold(highlight("\u25B6")), " Reading ", highlight(basename(path)), "...", verbosity = verbosity ) .dat <- readRDS(path) } else if (ext == "xlsx") { check_dependencies("readxl") msg0( bold(highlight("\u25B6")), " Reading ", highlight(basename(path)), " using readxl::read_excel()...", verbosity = verbosity ) .dat <- readxl::read_excel( path, sheet = xlsx_sheet, na = na_strings, ... ) } else if (ext == "dta") { check_dependencies("haven") msg0( bold(highlight("\u25B6")), " Reading ", highlight(basename(path)), " using haven::read_dta()...", verbosity = verbosity ) .dat <- haven::read_dta(path, ...) } else if (ext == "fasta") { check_dependencies("seqinr") msg0( bold(highlight("\u25B6")), " Reading ", highlight(basename(path)), " using seqinr::read.fasta()...", verbosity = verbosity ) .dat <- seqinr::read.fasta(path, ...) # if single sequence, return as character if (length(.dat) == 1) { .dat <- as.character(.dat[[1]]) } return(.dat) } else if (ext == "arff") { check_dependencies("farff") msg0( bold(highlight("\u25B6")), " Reading ", highlight(basename(path)), " using farff::readARFF()...", verbosity = verbosity ) .dat <- farff::readARFF(path, ...) } else { msg0( bold(highlight("\u25B6")), " Reading ", highlight(basename(path)), " using ", delim_reader, "...", verbosity = verbosity ) if (delim_reader == "data.table") { if (is.null(sep)) { sep <- "auto" } .dat <- data.table::fread( path, sep = sep, quote = quote, na.strings = na_strings, verbose = fread_verbosity > 0L, ... ) } else if (delim_reader == "duckdb") { check_dependencies("DBI", "duckdb") if (is.null(sep)) { sep <- "," } if (length(na_strings) > 1) { msg( "Note: 'na_strings' must be a single string for duckdb; setting to '", na_strings[1], "'" ) na_strings <- na_strings[1] } con <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:") on.exit(DBI::dbDisconnect(con, shutdown = TRUE), add = TRUE) duckdb::duckdb_read_csv( con, "data", path, header = TRUE, na.strings = na_strings, nrow.check = 500, delim = sep, quote = quote, ... ) .dat <- DBI::dbReadTable(con, "data") } else if (delim_reader == "arrow") { check_dependencies("arrow") if (is.null(sep)) { sep <- "," } .dat <- arrow::read_delim_arrow( path, delim = sep, quote = quote, na = na_strings, ... ) } else { check_dependencies("vroom") .dat <- vroom::vroom( path, delim = sep, quote = quote, na = na_strings, progress = verbosity > 0L, ... ) } } .nrow <- nrow(.dat) .ncol <- ncol(.dat) msg( "Read in", highlightbig(.nrow), "x", highlightbig(.ncol), verbosity = verbosity ) if (make_unique) { .dat <- unique(.dat) .nrowp <- nrow(.dat) .dup <- .nrow - .nrowp if (verbosity > 0L && .dup > 0) { msg( "Removed", bold(orange(format(.dup, big.mark = ","))), "duplicate", paste0(ngettext(.dup, "row", "rows"), ".") ) msg( "New dimensions:", highlightbig(.nrowp), "x", highlightbig(.ncol) ) } } if (clean_colnames) { setnames(.dat, names(.dat), clean_colnames(.dat)) } if (character2factor) { .dat <- preprocess( .dat, setup_Preprocessor(character2factor = TRUE) )[["preprocessed"]] } if (!is.null(attr) && !is.null(value)) { for (i in seq_len(ncol(.dat))) { setattr(.dat[[i]], attr, value) } } if (timed) { outro(start_time) } # Set output structure if (output == "data.table") { if (!is.data.table(.dat)) setDT(.dat) } else if (output == "tibble") { .dat <- tibble::as_tibble(.dat) } else if (output == "data.frame") { if (!is.data.frame(.dat)) { .dat <- as.data.frame(.dat) } else { setDF(.dat) } } .dat } # /rtemis::read ================================================ FILE: R/resample.R ================================================ # resample.R # ::rtemis:: # 2015- EDG rtemis.org #' Resample data #' #' Create resamples of your data, e.g. for model building or validation. #' "KFold" creates stratified folds, , "StratSub" creates stratified subsamples, #' "Bootstrap" gives the standard bootstrap, i.e. random sampling with replacement, #' while "StratBoot" uses StratSub and then randomly duplicates some of the training cases to #' reach original length of input (default) or length defined by `target_length`. #' #' Note that option 'KFold' may result in resamples of slightly different length. Avoid all #' operations which rely on equal-length vectors. For example, you can't place resamples in a #' data.frame, but must use a list instead. #' #' @param x Vector or data.frame: Usually the outcome; `NROW(x)` defines the sample size. #' @param config Resampler object created by [setup_Resampler]. #' @param verbosity Integer: Verbosity level. #' #' @return `Resampler` object. #' #' @author EDG #' @export #' #' @examples #' y <- rnorm(200) #' # 10-fold (stratified) #' y_10fold <- resample(y, setup_Resampler(10L, "kfold")) #' y_10fold #' # 25 stratified subsamples #' y_25strat <- resample(y, setup_Resampler(25L, "stratsub")) #' y_25strat #' # 100 stratified bootstraps #' y_100strat <- resample(y, setup_Resampler(100L, "stratboot")) #' y_100strat #' # LOOCV #' y_loocv <- resample(y, setup_Resampler(type = "LOOCV")) #' y_loocv resample <- function( x, config = setup_Resampler(), # index = NULL, # group = NULL, verbosity = 1L ) { check_is_S7(config, ResamplerConfig) # Input ---- type <- config@type if (NCOL(x) > 1) { if (survival::is.Surv(x)) { msg("Survival object will be stratified on time.", verbosity = verbosity) x <- x[, 1] } else { msg( "Input contains more than one column; stratifying on last.", verbosity = verbosity ) x <- x[[NCOL(x)]] } } # Stratify on case IDs ---- id_strat <- if (type != "LOOCV") { config@id_strat } else { NULL } if (!is.null(id_strat)) { # Only keep unique IDs idl <- !duplicated(id_strat) x <- x[idl] } if (type == "StratBoot") { target_length <- if (is.null(config@target_length)) { NROW(x) } else { config@target_length } } # resample ---- if (!type %in% c("Bootstrap", "LOOCV")) { .stratify_var <- if (is.null(config@stratify_var)) { x } else { config@stratify_var } } n_resamples <- if (type == "LOOCV") length(x) else config@n # Print config ---- if (verbosity > 1L) { print(config) } # Make resamples ---- if (type == "StratSub") { ## StratSub ---- res_part <- strat_sub( x = x, n_resamples = n_resamples, train_p = config@train_p, stratify_var = .stratify_var, strat_n_bins = config@strat_n_bins, seed = config@seed, verbosity = verbosity ) } else if (type == "Bootstrap") { ## Bootstrap ---- res_part <- bootstrap( x = x, n_resamples = n_resamples, seed = config@seed ) } else if (type == "KFold") { ## KFold ---- res_part <- kfold( x = x, k = n_resamples, stratify_var = .stratify_var, strat_n_bins = config@strat_n_bins, seed = config@seed, verbosity = verbosity ) } else if (type == "LOOCV") { ## LOOCV ---- res_part <- loocv(x = x) # Get number of resamples config@n <- length(res_part) } else if (type == "StratBoot") { ## StratBoot ---- res_part <- strat_boot( x = x, n_resamples = n_resamples, train_p = config@train_p, stratify_var = .stratify_var, strat_n_bins = config@strat_n_bins, target_length = target_length, seed = config@seed, verbosity = verbosity ) } # Update strat_n_bins ---- if (type == "StratSub" || type == "StratBoot") { actual_n_bins <- attr(res_part, "strat_n_bins") if (actual_n_bins != config@strat_n_bins) { if (verbosity > 0L) { msg0( "Updated strat_n_bins from ", config@strat_n_bins, " to ", actual_n_bins, " in ResamplerConfig object." ) } config@strat_n_bins <- actual_n_bins } } if (!is.null(id_strat)) { ### Get ID by resample ---- id_by_res <- lapply(res_part, \(x) id_strat[idl][x]) ### Get resamples on original data with replicates ---- res_part <- lapply(id_by_res, \(x) which(id_strat %in% x)) } # Output ---- Resampler(type, res_part, config) } # /rtemis::resample #' Bootstrap Resampling #' #' @param x Input vector. #' @param n_resamples Integer: Number of resamples to make. #' @param seed Integer: If provided, set seed for reproducibility. #' #' @author EDG #' #' @keywords internal #' @noRd bootstrap <- function(x, n_resamples = 10, seed = NULL) { if (!is.null(seed)) { set.seed(seed) } ids <- seq_along(x) .length <- length(x) if (!is.null(seed)) { set.seed(seed) } res <- lapply( seq(n_resamples), function(i) sort(sample(ids, .length, replace = TRUE)) ) names(res) <- paste0("Bootsrap_", seq(n_resamples)) res } # /rtemis::bootstrap #' K-fold Resampling #' #' @inheritParams resample #' @param x Input Vector. #' @param k Integer: Number of folds. #' #' @author EDG #' #' @keywords internal #' @noRd kfold <- function( x, k = 10, stratify_var = NULL, strat_n_bins = 4, seed = NULL, verbosity = TRUE ) { if (!is.null(seed)) { set.seed(seed) } if (is.null(stratify_var)) { stratify_var <- x } stratify_var <- as.numeric(stratify_var) # ->> update max.bins <- length(unique(stratify_var)) if (max.bins < strat_n_bins) { if (max.bins == 1) { cli::cli_abort("Only one unique value present in stratify_var.") } if (verbosity > 0L) { msg0("Using max n bins possible = ", max.bins, ".") } strat_n_bins <- max.bins } ids <- seq_along(x) # cuts cuts <- cut(stratify_var, breaks = strat_n_bins, labels = FALSE) cut.bins <- sort(unique(cuts)) # ids by cut idl <- lapply(seq_along(cut.bins), function(i) ids[cuts == cut.bins[i]]) # length of each cut # idl.length <- sapply(idl, length) idl.length <- as.numeric(table(cuts)) # split each idl into k folds after randomizing them idl.k <- vector("list", length(cut.bins)) for (i in seq_along(cut.bins)) { cut1 <- cut(sample(idl.length[i]), breaks = k, labels = FALSE) idl.k[[i]] <- lapply(seq(k), function(j) idl[[i]][cut1 == j]) } res <- lapply( seq(k), \(i) { seq(ids)[-sort(unlist(lapply(seq_along(cut.bins), \(j) idl.k[[j]][[i]])))] } ) names(res) <- paste0("Fold_", seq(k)) attr(res, "strat_n_bins") <- strat_n_bins res } # /rtemis::kfold #' Resample using Stratified Subsamples #' #' @inheritParams resample #' @param x Input vector #' #' @author EDG #' #' @keywords internal #' @noRd strat_sub <- function( x, n_resamples = 10, train_p = .75, stratify_var = NULL, strat_n_bins = 4, seed = NULL, verbosity = TRUE ) { if (!is.null(seed)) { set.seed(seed) } if (is.null(stratify_var)) { stratify_var <- x } stratify_var <- as.numeric(stratify_var) max.bins <- length(unique(stratify_var)) if (max.bins < strat_n_bins) { if (verbosity > 0L) { msg("Using max n bins possible =", max.bins) } strat_n_bins <- max.bins } ids <- seq_along(x) cuts <- cut(stratify_var, breaks = strat_n_bins, labels = FALSE) cut.bins <- sort(unique(cuts)) idl <- lapply(seq_along(cut.bins), function(i) ids[cuts == cut.bins[i]]) idl.length <- as.numeric(table(cuts)) res <- lapply(seq(n_resamples), function(i) { sort(unlist(sapply(seq_along(cut.bins), function(j) { sample(idl[[j]], train_p * idl.length[j]) }))) }) names(res) <- paste0("Subsample_", seq(n_resamples)) attr(res, "strat_n_bins") <- strat_n_bins res } # /rtemis::strat_sub #' Stratified Bootstrap Resampling #' #' @inheritParams resample #' @param x Input vector #' #' @author EDG #' #' @keywords internal #' @noRd strat_boot <- function( x, n_resamples = 10, train_p = .75, stratify_var = NULL, strat_n_bins = 4, target_length = NULL, seed = NULL, verbosity = TRUE ) { if (!is.null(seed)) { set.seed(seed) } res_part1 <- strat_sub( x = x, n_resamples = n_resamples, train_p = train_p, stratify_var = stratify_var, strat_n_bins = strat_n_bins, verbosity = verbosity ) # Make sure target_length was not too short by accident res.length <- length(res_part1[[1]]) if (is.null(target_length)) { target_length <- length(x) } if (target_length < res.length) { target_length <- length(x) } # Add back this many cases add.length <- target_length - res.length doreplace <- ifelse(add.length > res.length, 1, 0) res_part2 <- lapply( res_part1, function(i) sample(i, add.length, replace = doreplace) ) res <- mapply(c, res_part1, res_part2, SIMPLIFY = FALSE) res <- lapply(res, sort) names(res) <- paste0("StratBoot_", seq(n_resamples)) attr(res, "strat_n_bins") <- strat_n_bins res } # /rtemis::strat_boot #' Leave-one-out Resampling #' #' @param x Input vector #' #' @author EDG #' #' @keywords internal #' @noRd loocv <- function(x) { res <- lapply(seq(x), function(i) (seq(x))[-i]) names(res) <- paste0("Fold_", seq(res)) res } # /rtemis::loocv ================================================ FILE: R/rtemis-package.R ================================================ # rtemis-package.R # ::rtemis:: # 2015- EDG rtemis.org #' \pkg{rtemis}: Advanced Machine Learning and Visualization #' #' @description #' Advanced Machine Learning & Visualization made efficient, accessible, reproducible #' #' @section Online Documentation and Vignettes: #' #' #' @section System Setup: #' There are some options you can define in your .Rprofile (usually found in your home directory), #' so you do not have to define each time you execute a function. #' \describe{ #' \item{rtemis_theme}{General plotting theme; set to e.g. "whiteigrid" or "darkgraygrid"} #' \item{rtemis_font}{Font family to use in plots.} #' \item{rtemis_palette}{Name of default palette to use in plots. See options by running `get_palette()`} #' } #' @section Visualization: #' Graphics are handled using the `draw` family, which produces interactive plots primarily using #' `plotly` and other packages. #' #' @section Supervised Learning: #' By convention, the last column of the data is the outcome variable, and all other columns are #' predictors. Convenience function [set_outcome] can be used to move a specified column to the #' end of the data. #' Regression and Classification is performed using `train()`. #' This function allows you to preprocess, train, tune, and test models on multiple resamples. #' Use [available_supervised] to get a list of available algorithms #' #' @section Classification: #' For training of binary classification models, the outcome should be provided as a factor, #' with the *second* level of the factor being the 'positive' class. #' #' @section Clustering: #' Clustering is performed using `cluster()`. #' Use [available_clustering] to get a list of available algorithms. #' #' @section Decomposition: #' Decomposition is performed using `decomp()`. #' Use [available_decomposition] to get a list of available algorithms. #' #' @section Type Documentation: #' Function documentation includes input type (e.g. "Character", "Integer", #' "Float"/"Numeric", etc). #' When applicable, value ranges are provided in interval notation. For example, Float: [0, 1) #' means floats between 0 and 1 including 0, but excluding 1. #' Categorical variables may include set of allowed values using curly braces. #' For example, Character: \{"future", "mirai", "none"\}. #' #' @section Tabular Data: #' \pkg{rtemis} internally uses methods for efficient handling of tabular data, with support for #' `data.frame`, `data.table`, and `tibble`. If a function is documented as accepting #' "tabular data", it should work with any of these data structures. If a function is documented #' as accepting only one of these, then it should only be used with that structure. #' For example, some optimized `data.table` operations that perform in-place modifications only #' work with `data.table` objects. #' #' @name rtemis-package #' @import stats methods graphics grDevices S7 data.table htmltools #' @importFrom utils packageVersion sessionInfo getFromNamespace head tail "_PACKAGE" NULL ================================================ FILE: R/rtemis_color_system.R ================================================ #' rtemis Color System #' #' @author EDG #' #' @keywords internal #' @noRd rtemis_light_teal <- "#00fdfd" rtemis_light_blue <- "#30cefe" rtemis_teal <- "#00b2b2" kaimana_red <- "#ff004c" kaimana_blue <- "#0067e0" kaimana_light_blue <- "#479cff" coastside_orange <- "#ff9f20" rtemis_orange <- "#ff4f36" kaimana_green <- "#00ffb3" kaimana_med_green <- "#00996b" rtemis_purple <- "#6125f7" rtemis_magenta <- "#912ac8" rtemis_magenta_light <- "#b25bd6" magenta <- "#ff00ff" lmd_burgundy <- "#a92459" rtms_gray <- "#808080" rt_gray <- rtms_gray rt_red <- kaimana_red rt_blue <- kaimana_light_blue rt_green <- kaimana_med_green rt_orange <- coastside_orange rt_teal <- rtemis_teal rt_purple <- rtemis_purple rt_magenta <- rtemis_magenta_light # %% rtemis colors ---- highlight_col <- rt_orange col_object <- rt_gray # object name in repr_S7name col_info <- highlight2_col <- lmd_burgundy col_outer <- rt_green col_tuner <- rt_blue col_calibrator <- rt_magenta #' rtemis Color System #' #' A named list of colors used consistently across all packages #' in the rtemis ecosystem. #' #' Colors are provided as hex strings. #' #' @format A named list with the following elements: #' \describe{ #' \item{red}{"kaimana red"} #' \item{blue}{"kaimana light blue"} #' \item{green}{"kaimana medium green"} #' \item{orange}{"coastside orange"} #' \item{teal}{"rtemis teal"} #' \item{purple}{"rtemis purple"} #' \item{magenta}{"rtemis magenta"} #' \item{highlight_col}{"highlight color"} #' \item{object}{"rtemis teal"} #' \item{info}{"lmd burgundy"} #' \item{outer}{"kaimana red"} #' \item{tuner}{"coastside orange"} #' } #' #' @examples #' rtemis_colors[["orange"]] #' #' @author EDG #' #' @export #' #' @examples #' rtemis_colors[["teal"]] rtemis_colors <- list( red = rt_red, blue = rt_blue, green = rt_green, orange = rt_orange, teal = rt_teal, purple = rt_purple, magenta = rt_magenta, highlight_col = highlight_col, object = col_object, info = col_info, outer = col_outer, tuner = col_tuner ) # /rtemis.utils::rtemis_colors ================================================ FILE: R/theme.R ================================================ # theme.R # ::rtemis:: # EDG rtemis.org # %% Black ---- #' Themes for `draw_*` functions #' #' @param bg Color: Figure background. #' @param plot_bg Color: Plot region background. #' @param fg Color: Foreground color used as default for multiple elements like #' axes and labels, which can be defined separately. #' @param pch Integer: Point character. #' @param cex Float: Character expansion factor. #' @param lwd Float: Line width. #' @param bty Character: Box type: "o", "l", "7", "c", "u", or "]", or "n". #' @param box_col Box color if `bty != "n"`. #' @param box_alpha Float: Box alpha. #' @param box_lty Integer: Box line type. #' @param box_lwd Float: Box line width. #' @param grid Logical: If TRUE, draw grid in plot regions. #' @param grid_nx Integer: N of vertical grid lines. #' @param grid_ny Integer: N of horizontal grid lines. #' @param grid_col Grid color. #' @param grid_alpha Float: Grid alpha. #' @param grid_lty Integer: Grid line type. #' @param grid_lwd Float: Grid line width. #' @param axes_visible Logical: If TRUE, draw axes. #' @param axes_col Axes colors. #' @param tick_col Tick color. #' @param tick_alpha Float: Tick alpha. #' @param tick_labels_col Tick labels' color. #' @param tck `graphics::parr`'s tck argument: Tick length, can be negative. #' @param tcl `graphics::parr`'s tcl argument. #' @param x_axis_side Integer: Side to place x-axis. #' @param y_axis_side Integer: Side to place y-axis. #' @param labs_col Labels' color. #' @param x_axis_line Numeric: `graphics::axis`'s `line` argument for the x-axis. #' @param x_axis_las Numeric: `graphics::axis`'s `las` argument for the x-axis. #' @param x_axis_padj Numeric: x-axis' `padj`: Adjustment for the x-axis #' tick labels' position. #' @param x_axis_hadj Numeric: x-axis' `hadj`. #' @param y_axis_line Numeric: `graphics::axis`'s `line` argument for the y-axis. #' @param y_axis_las Numeric: `graphics::axis`'s `las` argument for the y-axis. #' @param y_axis_padj Numeric: y-axis' `padj`. #' @param y_axis_hadj Numeric: y-axis' `hadj`. #' @param xlab_line Numeric: Line to place `xlab`. #' @param ylab_line Numeric: Line to place `ylab`. #' @param zerolines Logical: If TRUE, draw lines on x = 0, y = 0, if within #' plot limits. #' @param zerolines_col Zerolines color. #' @param zerolines_alpha Float: Zerolines alpha. #' @param zerolines_lty Integer: Zerolines line type. #' @param zerolines_lwd Float: Zerolines line width. #' @param main_line Float: How many lines away from the plot region to draw #' title. #' @param main_adj Float: How to align title. #' @param main_font Integer: 1: Regular, 2: Bold. #' @param main_col Title color. #' @param font_family Character: Font to be used throughout plot. #' #' @return `Theme` object. #' #' @rdname theme #' @export #' #' @examples #' theme <- theme_black(font_family = "Geist") #' theme theme_black <- function( bg = "#000000", plot_bg = "transparent", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = FALSE, grid_nx = NULL, grid_ny = NULL, grid_col = fg, grid_alpha = .2, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = .5, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "black", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_black #' @rdname theme #' @export theme_blackgrid <- function( bg = "#000000", plot_bg = "transparent", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = fg, grid_alpha = .2, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "blackgrid", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_blackgrid #' @rdname theme #' @export theme_blackigrid <- function( bg = "#000000", plot_bg = "#1A1A1A", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = bg, grid_alpha = 1, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "blackigrid", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_darkgrid #' @rdname theme #' @export theme_darkgray <- function( bg = "#121212", plot_bg = "transparent", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = FALSE, grid_nx = NULL, grid_ny = NULL, grid_col = fg, grid_alpha = .2, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = .5, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "darkgray", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_darkgray #' @rdname theme #' @export theme_darkgraygrid <- function( bg = "#121212", plot_bg = "transparent", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = "#404040", grid_alpha = 1, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = "#00000000", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "darkgraygrid", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_darkgraygrid #' @rdname theme #' @export theme_darkgrayigrid <- function( bg = "#121212", plot_bg = "#202020", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = bg, grid_alpha = 1, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = "transparent", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "darkgrayigrid", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_darkgrayigrid # %% White ---- #' @rdname theme #' @export theme_white <- function( bg = "#ffffff", plot_bg = "transparent", fg = "#000000", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = FALSE, grid_nx = NULL, grid_ny = NULL, grid_col = fg, grid_alpha = 1, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = .5, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "white", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_white #' @rdname theme #' @export theme_whitegrid <- function( bg = "#ffffff", plot_bg = "transparent", fg = "#000000", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = "#c0c0c0", grid_alpha = 1, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = "#00000000", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "whitegrid", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_whitegrid #' @rdname theme #' @export theme_whiteigrid <- function( bg = "#ffffff", plot_bg = "#E6E6E6", fg = "#000000", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = bg, grid_alpha = 1, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = "transparent", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "whiteigrid", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_whiteigrid # %% Gray ---- #' @rdname theme #' @export theme_lightgraygrid <- function( bg = "#dfdfdf", plot_bg = "transparent", fg = "#000000", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = "#c0c0c0", grid_alpha = 1, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = "#00000000", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "lightgraygrid", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_lightgray #' @rdname theme #' @export theme_mediumgraygrid <- function( bg = "#b3b3b3", plot_bg = "transparent", fg = "#000000", pch = 16, cex = 1, lwd = 2, # box -- bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = .5, # grid -- grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = "#d0d0d0", grid_alpha = 1, grid_lty = 1, grid_lwd = 1, # axes -- axes_visible = TRUE, axes_col = "transparent", tick_col = "#00000000", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = .5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = .5, y_axis_hadj = .5, xlab_line = 1.4, ylab_line = 2, # zerolines -- zerolines = TRUE, zerolines_col = fg, zerolines_alpha = .5, zerolines_lty = 1, zerolines_lwd = 1, # title -- main_line = .25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) { Theme( name = "mediumgraygrid", config = list( bg = bg, plot_bg = plot_bg, fg = fg, pch = pch, cex = cex, lwd = lwd, # box -- bty = bty, box_col = box_col, box_alpha = box_alpha, box_lty = box_lty, box_lwd = box_lwd, # grid -- grid = grid, grid_nx = grid_nx, grid_ny = grid_ny, grid_col = grid_col, grid_alpha = grid_alpha, grid_lty = grid_lty, grid_lwd = grid_lwd, # axes -- axes_visible = axes_visible, axes_col = axes_col, tick_col = tick_col, tick_alpha = tick_alpha, tick_labels_col = tick_labels_col, tck = tck, tcl = tcl, x_axis_side = x_axis_side, y_axis_side = y_axis_side, labs_col = labs_col, x_axis_line = x_axis_line, x_axis_las = x_axis_las, x_axis_padj = x_axis_padj, x_axis_hadj = x_axis_hadj, y_axis_line = y_axis_line, y_axis_las = y_axis_las, y_axis_padj = y_axis_padj, y_axis_hadj = y_axis_hadj, xlab_line = xlab_line, ylab_line = ylab_line, # zerolines -- zerolines = zerolines, zerolines_col = zerolines_col, zerolines_alpha = zerolines_alpha, zerolines_lty = zerolines_lty, zerolines_lwd = zerolines_lwd, # title -- main_line = main_line, main_adj = main_adj, main_font = main_font, main_col = main_col, font_family = font_family ) ) } # /rtemis::theme_mediumdgray #' Print available \pkg{rtemis} themes #' #' @return Called for its side effect of printing available themes. #' #' @author EDG #' @export #' #' @examples #' available_themes() available_themes <- function() { cat(highlight(" Available themes:\n")) cat(' "white", "whitegrid", "whiteigrid,\n') cat(' "black", "blackgrid", "blackigrid",\n') cat(' "darkgray", "darkgraygrid", "darkgrayigrid",\n') cat(' "lightgraygrid", "mediumgraygrid"\n') invisible() } # %% choose_theme ---- #' Select an rtemis theme #' #' @details #' If `x` is not defined, `choose_theme()` will use `getOption("rtemis_theme", "whitegrid")` to #' select the theme. This allows users to set a default theme for all rtemis plots by setting #' `options(rtemis_theme = "theme_name")` at any point. #' #' @param x Character: Name of theme to select. If not defined, will use `getOption("rtemis_theme", "whitegrid")`. #' @param override Optional List: Theme parameters to override defaults. #' #' @return `Theme` object. #' #' @author EDG #' @export #' #' @examples #' # Get default theme set by options(rtemis_theme = "theme_name"). #' # If not set, defaults to "whitegrid": #' choose_theme() #' # Get darkgraygrid theme. Same as `theme_darkgraygrid()`: #' choose_theme("darkgraygrid") #' # This will use the default theme, and override the foreground color to red: #' choose_theme(override = list(fg = "#ff0000")) choose_theme <- function( x = c( "white", "whitegrid", "whiteigrid", "black", "blackgrid", "blackigrid", "darkgray", "darkgraygrid", "darkgrayigrid", "lightgraygrid", "mediumgraygrid" ), override = NULL ) { if (length(x) > 1) { x <- getOption("rtemis_theme", "whitegrid") } if (is.null(override)) { override <- list() } do_call(paste0("theme_", x), override) } # /rtemis::theme ================================================ FILE: R/train.R ================================================ # train.R # ::rtemis:: # 2025- EDG rtemis.org # %% train ---- #' Train Supervised Learning Models #' #' @description #' Preprocess, tune, train, and test supervised learning models using nested resampling in a single #' call. #' #' @param x Tabular data, i.e. data.frame, data.table, or tbl_df (tibble): Training set data. #' @param dat_validation Tabular data: Validation set data. #' @param dat_test Tabular data: Test set data. #' @param weights Optional vector of case weights. #' @param algorithm Character: Algorithm to use. Can be left NULL, if `hyperparameters` is defined. #' @param preprocessor_config Optional PreprocessorConfig object: Setup using [setup_Preprocessor]. #' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions. #' @param tuner_config TunerConfig object: Setup using [setup_GridSearch]. #' @param outer_resampling_config Optional ResamplerConfig object: Setup using [setup_Resampler]. #' This defines the outer resampling method, i.e. the splitting into training and test sets for the #' purpose of assessing model performance. If NULL, no outer resampling is performed, in which case #' you might want to use a `dat_test` dataset to assess model performance on a single test set. #' @param execution_config `ExecutionConfig` object: Setup using [setup_ExecutionConfig]. This #' allows you to set backend ("future", "mirai", or "none"), number of workers, and future plan if #' using `backend = "future"`. #' @param question Optional character string defining the question that the model is trying to #' answer. #' @param outdir Character, optional: String defining the output directory. #' @param verbosity Integer: Verbosity level. #' @param ... Not used. #' #' @details #' **Online book & documentation** #' #' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation. #' #' **Preprocessing** #' #' There are many different stages at which preprocessing could be applied, when running a #' supervised learning pipeline with nested resampling. Some operations are best done before #' passing data to `train()`: #' #' - Duplicate rows should be removed before resampling, so that duplicates don't end up in #' different resamples, e.g. one in training and one in test. #' - Constant columns should be removed before resampling. A column may appear constant in a small #' resample, even if it is not constant in the full dataset. Removing it inconsistently will #' throw an error during prediction. #' - All data-dependent preprocessing steps need to be performed on training data only and applied #' on validation and test data, e.g. scaling, centering, imputation. #' #' User-defined preprocessing through `preprocessor_config` is applied on training set data, #' the learned parameters are stored in the returned Supervised or SupervisedRes object, and the #' preprocessing is applied on validation and test data. #' #' **Binary Classification** #' #' For binary classification, the outcome should be a factor where *the 2nd level #' corresponds to the positive class*. #' #' **Resampling** #' #' Note that you should not use an outer resampling method with #' replacement if you will also be using an inner resampling (for tuning). #' The duplicated cases from the outer resampling may appear both in the #' training and test sets of the inner resamples, leading to underestimated #' test error. #' #' **Reproducibility** #' #' If using ***outer resampling***, you can set a seed when defining `outer_resampling_config`, e.g. #' ```r #' outer_resampling_config = setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L) #' ``` #' If using ***tuning with inner resampling***, you can set a seed when defining `tuner_config`, #' e.g. #' ```r #' tuner_config = setup_GridSearch( #' resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold", seed = 2027L) #' ) #' ``` #' #' **Parallelization** #' #' There are three levels of parallelization that may be used during training: #' #' 1. Algorithm training (e.g. a parallelized learner like LightGBM) #' 2. Tuning (inner resampling, where multiple resamples can be processed in parallel) #' 3. Outer resampling (where multiple outer resamples can be processed in parallel) #' #' The `train()` function will automatically manage parallelization depending #' on: #' - The number of workers specified by the user using `n_workers` #' - Whether the training algorithm supports parallelization itself #' - Whether hyperparameter tuning is needed #' #' @return Object of class `Regression(Supervised)`, `RegressionRes(SupervisedRes)`, #' `Classification(Supervised)`, or `ClassificationRes(SupervisedRes)`. #' #' @author EDG #' @export #' #' @examples #' \donttest{ #' iris_c_lightRF <- train( #' iris, #' algorithm = "LightRF", #' outer_resampling_config = setup_Resampler(), #' ) #' } train <- function( x, dat_validation = NULL, dat_test = NULL, weights = NULL, algorithm = NULL, preprocessor_config = NULL, # PreprocessorConfig hyperparameters = NULL, # Hyperparameters tuner_config = NULL, # TunerConfig outer_resampling_config = NULL, # ResamplerConfig execution_config = setup_ExecutionConfig(), # ExecutionConfig question = NULL, outdir = NULL, verbosity = 1L, ... ) { # SuperConfigLive dispatch ---- if (S7_inherits(x, SuperConfigLive)) { return(train( x = x@dat_training, dat_validation = x@dat_validation, dat_test = x@dat_test, weights = x@weights, preprocessor_config = x@preprocessor_config, algorithm = x@algorithm, hyperparameters = x@hyperparameters, tuner_config = x@tuner_config, outer_resampling_config = x@outer_resampling_config, execution_config = x@execution_config, question = x@question, outdir = x@outdir, verbosity = x@verbosity )) } # / train.SuperConfigLive # SuperConfig dispatch ---- if (S7_inherits(x, SuperConfig)) { dat_training <- read(x@dat_training_path, character2factor = TRUE) dat_validation <- if (!is.null(x@dat_validation_path)) { read(x@dat_validation_path) } else { NULL } dat_test <- if (!is.null(x@dat_test_path)) { read(x@dat_test_path) } else { NULL } # Call train() with data and other parameters from config return(train( x = dat_training, dat_validation = dat_validation, dat_test = dat_test, weights = x@weights, preprocessor_config = x@preprocessor_config, algorithm = x@algorithm, hyperparameters = x@hyperparameters, tuner_config = x@tuner_config, outer_resampling_config = x@outer_resampling_config, execution_config = x@execution_config, question = x@question, outdir = x@outdir, verbosity = x@verbosity )) } # / train.SuperConfig # Checks ---- if (is.null(hyperparameters) && is.null(algorithm)) { cli::cli_abort( "You must define either {.arg hyperparameters} or {.arg algorithm}." ) } extra_args <- list(...) if (length(extra_args) > 0L) { cli::cli_abort( "Unused extra arguments were provided: {.val {names(extra_args)}}. Please check your function call." ) } if (is.null(algorithm) && !is.null(hyperparameters)) { algorithm <- hyperparameters@algorithm } type <- supervised_type(x) ncols <- ncol(x) if (is.null(hyperparameters) && !is.null(algorithm)) { hyperparameters <- get_default_hyperparameters( algorithm, type = type, ncols = ncols ) } if ( !is.null(algorithm) && tolower(algorithm) != tolower(hyperparameters@algorithm) ) { cli::cli_abort( "You defined algorithm to be '{algorithm}', but defined hyperparameters for {hyperparameters@algorithm}." ) } check_is_S7(hyperparameters, Hyperparameters) # Set default tuner_config if tuning is needed but none specified if (needs_tuning(hyperparameters) && is.null(tuner_config)) { tuner_config <- setup_GridSearch() } if (!is.null(tuner_config)) { check_is_S7(tuner_config, TunerConfig) } if (!is.null(preprocessor_config)) { check_is_S7(preprocessor_config, PreprocessorConfig) } # execution_config must always be set check_is_S7(execution_config, ExecutionConfig) # Override parallelization parameters with those from execution_config backend <- execution_config@backend n_workers <- execution_config@n_workers future_plan <- execution_config@future_plan # If outer_resampling_config is set, dat_validation and dat_test must be NULL if (!is.null(outer_resampling_config)) { if (!is.null(dat_validation) || !is.null(dat_test)) { cli::cli_abort( "If outer_resampling_config is set, {.arg dat_validation} and {.arg dat_test} must be NULL." ) } } if (backend == "future" && future_plan == "mirai_multisession") { future_plan <- "future.mirai::mirai_multisession" } if (!is.null(outer_resampling_config)) { check_is_S7(outer_resampling_config, ResamplerConfig) if (!is.null(outer_resampling_config[["id_strat"]])) { stopifnot(length(outer_resampling_config[["id_strat"]]) == NROW(x)) } } algorithm <- get_alg_name(algorithm) if (!is.null(outdir)) { outdir <- make_path(outdir) if (!dir.exists(outdir)) { dir.create(outdir, showWarnings = FALSE, recursive = TRUE) } if (verbosity > 1L) { msg_info("Output directory set to ", outdir, ".") } } logfile <- if (!is.null(outdir)) { paste0( outdir, "/", "train_", algorithm, "_", format(Sys.time(), "%Y%m%d.%H%M%S"), ".log" ) } else { NULL } # Start timer & logfile ---- start_time <- intro(verbosity = verbosity, logfile = logfile) # Data ---- if (type == "Classification") { classes <- levels(outcome(x)) } ## Print data summary ---- if (verbosity > 0L) { summarize_supervised( x = x, dat_validation = dat_validation, dat_test = dat_test ) } # Init ---- workers <- get_n_workers( algorithm = algorithm, hyperparameters = hyperparameters, outer_resampling_config = outer_resampling_config, n_workers = n_workers, verbosity = verbosity ) hyperparameters@n_workers <- workers[["algorithm"]] tuner <- NULL # Set backend to "none" if workers[["tuning"]] == 1L backend <- if (workers[["tuning"]] == 1L) { "none" } else { backend } # Preprocessors ---- # `preprocessor`: User-level preprocessing (Preprocessor object created from # `setup_Preprocessor`). Handles scaling, imputation, encoding, etc. # `preprocessor_internal`: Algorithm-level preprocessing (Preprocessor object # returned by each train_*() method). Handles transformations the algorithm # requires internally (e.g. factor-to-integer conversion for LightGBM). # Both are stored on the trained model so predict() can re-apply them in order: # user-level first, then algorithm-level. # Initialized to NULL here; set in the single-model path below. # In the outer resampling path, each sub-model carries its own pair. preprocessor <- preprocessor_internal <- NULL # === Outer Resampling === # Splits data into multiple training-test folds and calls train() recursively # on each. Each recursive call enters the Single Model path below (which may # itself tune via inner resampling). After all folds complete, execution falls # through to the Outer Aggregation path. if (!is.null(outer_resampling_config)) { msg0( fmt("<> ", col = col_outer, bold = TRUE), "Training ", highlight(paste(algorithm, type)), " using ", desc(outer_resampling_config), "...", verbosity = verbosity ) outer_resampler <- resample( x, config = outer_resampling_config, verbosity = verbosity ) models <- lapply( cli::cli_progress_along( seq_len(outer_resampler@config@n), name = "Training outer resamples...", type = "tasks" ), function(i) { train( x = x[outer_resampler[[i]], ], dat_test = x[-outer_resampler[[i]], ], algorithm = algorithm, preprocessor_config = preprocessor_config, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampling_config = NULL, # This model is one of the outer resamples. execution_config = execution_config, weights = if (!is.null(weights)) { weights[outer_resampler[[i]]] } else { NULL }, question = question, verbosity = verbosity - 1L ) } ) names(models) <- names(outer_resampler@resamples) hyperparameters@resampled <- 1L msg( fmt("", col = col_outer, bold = TRUE), "Outer resampling done.", verbosity = verbosity ) } # /Outer Resampling if (hyperparameters@resampled == 0L) { # === Inner path === # Trains one model: optionally tune (inner resampling) → preprocess → # train algorithm → predict → returns Supervised. # Skipped when outer resampling was performed (resampled == 1L). # Tune ---- # Inner resampling for hyperparameter optimization. if (needs_tuning(hyperparameters)) { tuner <- tune( x = x, hyperparameters = hyperparameters, tuner_config = tuner_config, preprocessor_config = preprocessor_config, weights = weights, backend = backend, future_plan = future_plan, n_workers = workers[["tuning"]], verbosity = verbosity ) # Update hyperparameters hyperparameters <- update( hyperparameters, tuner@best_hyperparameters, tuned = 1L ) } # /Tune # User-level preprocessing ---- if (!is.null(preprocessor_config)) { preprocessor <- preprocess( x = x, config = preprocessor_config, dat_validation = dat_validation, dat_test = dat_test ) x <- if (is.null(dat_validation) && is.null(dat_test)) { preprocessor@preprocessed } else { preprocessor@preprocessed[["training"]] } if (!is.null(dat_validation)) { dat_validation <- preprocessor@preprocessed[["validation"]] } if (!is.null(dat_test)) dat_test <- preprocessor@preprocessed[["test"]] } else { preprocessor <- NULL } # /User-level preprocessing # IFW ---- # Weight calculation must follow preprocessing since N cases may change. if (type == "Classification" && hyperparameters[["ifw"]]) { if (!is.null(weights)) { cli::cli_abort("Custom weights are defined, but IFW is set to TRUE.") } else { weights <- ifw(x[[ncols]], type = "case_weights", verbosity = verbosity) } } # /IFW # Train algorithm ---- if (is_tuned(hyperparameters)) { msg( "Training", highlight(paste(algorithm, type)), "with tuned hyperparameters...", verbosity = verbosity ) } else { msg0( "Training ", highlight(paste(algorithm, type)), "...", verbosity = verbosity ) } # Validation data is only passed to learners that use early stopping. # For other learners, validation metrics are collected during tuning. dat_validation_for_training <- if (algorithm %in% early_stopping_algs) { dat_validation } else { NULL } trained <- train_( hyperparameters = hyperparameters, x = x, weights = weights, dat_validation = dat_validation_for_training, execution_config = execution_config, # used by LightRuleFit verbosity = verbosity ) model <- trained[["model"]] # Algorithm-level preprocessing (e.g. factor-to-integer for LightGBM), # returned by train_*() if needed. preprocessor_internal <- trained[["preprocessor"]] # Predictions ---- predicted_prob_training <- predicted_prob_validation <- predicted_prob_test <- NULL # Re-apply algorithm-level preprocessing before predicting on each dataset. x_features <- features(x) if (!is.null(preprocessor_internal)) { x_features <- preprocess( x_features, preprocessor_internal, verbosity = 0L ) |> preprocessed() } predicted_training <- predict_super( model = model, newdata = x_features, type = type ) if (type == "Classification") { predicted_prob_training <- predicted_training predicted_training <- prob2categorical( predicted_prob_training, levels = classes ) } predicted_validation <- predicted_test <- NULL if (!is.null(dat_validation)) { dat_validation_features <- features(dat_validation) if (!is.null(preprocessor_internal)) { dat_validation_features <- preprocess( dat_validation_features, preprocessor_internal, verbosity = 0L ) |> preprocessed() } predicted_validation <- predict_super( model = model, newdata = dat_validation_features, type = type ) if (type == "Classification") { predicted_prob_validation <- predicted_validation predicted_validation <- prob2categorical( predicted_prob_validation, levels = classes ) } } if (!is.null(dat_test)) { dat_test_features <- features(dat_test) if (!is.null(preprocessor_internal)) { dat_test_features <- preprocess( dat_test_features, preprocessor_internal, verbosity = 0L ) |> preprocessed() } predicted_test <- predict_super( model = model, newdata = dat_test_features, type = type ) if (type == "Classification") { predicted_prob_test <- predicted_test predicted_test <- prob2categorical( predicted_prob_test, levels = classes ) } } # Standard Errors ---- # Use the same (algorithm-level preprocessed) features as predictions. se_training <- se_validation <- se_test <- NULL if (type == "Regression" && algorithm %in% se_compat_algorithms) { se_training <- se_super(model = model, newdata = x_features) if (!is.null(dat_validation)) { se_validation <- se_super( model = model, newdata = dat_validation_features ) } if (!is.null(dat_test)) { se_test <- se_super(model = model, newdata = dat_test_features) } } # Return Supervised ---- mod <- make_Supervised( algorithm = algorithm, model = model, preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner = tuner, execution_config = execution_config, y_training = x[[ncols]], y_validation = if (!is.null(dat_validation)) dat_validation[[ncols]], y_test = if (!is.null(dat_test)) dat_test[[ncols]], predicted_training = predicted_training, predicted_validation = predicted_validation, predicted_test = predicted_test, predicted_prob_training = predicted_prob_training, predicted_prob_validation = predicted_prob_validation, predicted_prob_test = predicted_prob_test, se_training = se_training, se_validation = se_validation, se_test = se_test, xnames = names(x)[-ncols], varimp = varimp_super(model = model), question = question ) } else { # === Outer Aggregation path === # Reached after outer resampling. Each sub-model (Supervised) in `models` # carries its own preprocessor pair. Aggregate results → SupervisedRes. y_training <- lapply(models, function(mod) mod@y_training) y_test <- lapply(models, function(mod) mod@y_test) predicted_training <- lapply(models, function(mod) mod@predicted_training) predicted_test <- lapply(models, function(mod) mod@predicted_test) if (type == "Classification") { predicted_prob_training <- lapply( models, function(mod) mod@predicted_prob_training ) predicted_prob_test <- lapply( models, function(mod) mod@predicted_prob_test ) } else { predicted_prob_training <- predicted_prob_test <- NULL } # Return SupervisedRes ---- mod <- make_SupervisedRes( algorithm = algorithm, type = type, models = models, preprocessor = preprocessor, preprocessor_internal = preprocessor_internal, hyperparameters = hyperparameters, tuner_config = tuner_config, outer_resampler = outer_resampler, execution_config = execution_config, y_training = y_training, y_test = y_test, predicted_training = predicted_training, predicted_test = predicted_test, predicted_prob_training = predicted_prob_training, predicted_prob_test = predicted_prob_test, xnames = names(x)[-ncols], varimp = lapply(models, \(mod) mod@varimp), question = question ) } # Outro ---- if (verbosity > 0L) { message() print(mod) message() } if (!is.null(outdir)) { rt_save(mod, outdir = outdir, file_prefix = paste0("train_", algorithm)) } outro( start_time, logfile = logfile, verbosity = verbosity ) # Print object to logfile if (!is.null(logfile)) { cat( "\n", repr(mod, output_type = "plain"), file = logfile, append = TRUE, sep = "" ) } mod } # /rtemis::train # %% get_n_workers ---- # Function to assign number of workers to algorithm, tuning, or outer resampling # based on whether algorithm is parallelized, tuning is needed, and outer resampling is set. #' Get Number of Workers #' #' Distribute workers across different parallelization levels: algorithm training, #' tuning (inner resampling), and outer resampling. Assigns workers to the innermost #' available parallelization level to avoid over-subscription. #' #' @param algorithm Character: Algorithm name. #' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions. #' @param outer_resampling_config Optional ResamplerConfig object: Setup using [setup_Resampler]. #' @param n_workers Integer: Total number of workers you want to use. #' @param verbosity Integer: Verbosity level. #' #' @details #' The function prioritizes parallelization levels as follows: #' 1. If algorithm is parallelized (e.g., LightGBM, Ranger): all workers go to algorithm #' 2. Else if tuning is needed: all workers go to tuning (inner resampling) #' 3. Else if outer resampling is set: all workers go to outer resampling #' 4. Else: sequential execution (1 worker each) #' #' @return Named list with the number of workers for each level: #' - `algorithm`: Number of workers for algorithm training. #' - `tuning`: Number of workers for tuning (if applicable). #' - `outer_resampling_config`: Number of workers for outer resampling (if applicable). #' #' @keywords internal #' @noRd get_n_workers <- function( algorithm, hyperparameters, outer_resampling_config, n_workers, verbosity = 1L ) { # Input validation stopifnot( is.character(algorithm), length(algorithm) == 1L, is.numeric(n_workers), n_workers >= 1L, n_workers == as.integer(n_workers) ) # Check parallelization conditions is_parallelized <- algorithm %in% live[["parallelized_learners"]] requires_tuning <- needs_tuning(hyperparameters) requires_resampling <- !is.null(outer_resampling_config) # Assign workers to innermost parallelization level to avoid over-subscription if (is_parallelized) { # Parallelized algorithms get all workers, disable other parallelization workers_algorithm <- n_workers workers_tuning <- 1L workers_outer_resampling <- 1L if (verbosity > 1L && (requires_tuning || requires_resampling)) { msg( bold(algorithm), "is parallelized. Disabling tuning parallelization." ) } } else if (requires_tuning) { # Tuning gets all workers if algorithm is not parallelized workers_algorithm <- 1L workers_tuning <- n_workers workers_outer_resampling <- 1L if (requires_resampling) { msg( "Tuning parallelization enabled.", verbosity = verbosity ) } } else if (requires_resampling) { # Outer resampling gets all workers if no tuning needed workers_algorithm <- 1L workers_tuning <- 1L workers_outer_resampling <- n_workers } else { # Sequential execution workers_algorithm <- 1L workers_tuning <- 1L workers_outer_resampling <- 1L } msg0( bold("//"), " Max workers: ", highlight(n_workers), " => ", "Algorithm: ", highlight(workers_algorithm), "; Tuning: ", highlight(workers_tuning), "; Outer Resampling: ", highlight(workers_outer_resampling), verbosity = verbosity ) list( algorithm = workers_algorithm, tuning = workers_tuning, outer_resampling = workers_outer_resampling ) } # /rtemis::get_n_workers ================================================ FILE: R/train_CART.R ================================================ # train_CART.R # ::rtemis:: # 2025- EDG rtemis.org # %% train_.CARTHyperparameters ---- #' Train a CART decision tree #' #' Train a CART decision tree using `rpart`. #' #' CART does not need any special preprocessing. #' It works with numeric and factor variables and handles missing values. #' The "train_*" functions train a single model. #' Use [train] for tuning and test using nested cross-validation. #' #' @param hyperparameters `CARTHyperparameters` object: make using [setup_CART]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation Optional tabular data: Not used for CART. #' @param verbosity Integer: If > 0, print messages. #' #' @author EDG #' @keywords internal #' @noRd method(train_, CARTHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("rpart") # Arguments ---- # Hyperparameters must be either untunable or frozen by `train` if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Data ---- check_supervised( x = x, allow_missing = TRUE, verbosity = verbosity ) if (is.null(weights)) { weights <- rep(1, NROW(x)) } # Train ---- # weights can't be NULL. # !If formula is character, the input to weights must be the unquoted column name in the data.frame # that contains weights, e.g. by doing cbind(x, weights = weights) model <- rpart::rpart( as.formula(make_formula(x)), data = x, weights = weights, control = rpart::rpart.control( minsplit = hyperparameters[["minsplit"]], minbucket = hyperparameters[["minbucket"]], cp = hyperparameters[["cp"]], maxcompete = hyperparameters[["maxcompete"]], maxsurrogate = hyperparameters[["maxsurrogate"]], usesurrogate = hyperparameters[["usesurrogate"]], surrogatestyle = hyperparameters[["surrogatestyle"]], maxdepth = hyperparameters[["maxdepth"]], xval = hyperparameters[["xval"]] ) ) # Cost-Complexity Pruning ---- if (!is.null(hyperparameters[["prune_cp"]])) { model <- rpart::prune(model, cp = hyperparameters[["prune_cp"]]) } check_inherits(model, "rpart") list(model = model, preprocessor = NULL) } # /rtemis::train_.CARTHyperparameters # %% predict_super.class_rpart ---- #' Predict from rpart model #' #' @param model rpart model. #' @param newdata tabular data: Data to predict on. #' @param type Character: Type of supervised learning ("Classification" or "Regression"). #' #' @keywords internal #' @noRd method(predict_super, class_rpart) <- function( model, newdata, type = NULL, verbosity = 0L ) { if (type == "Classification") { # Classification # predict.rpart returns a matrix n_cases x n_classes, # with classes are ordered the same as factor levels predicted_prob <- predict(model, newdata = newdata, type = "prob") # binclasspos = 2L if (NCOL(predicted_prob) == 2L) { # In binary classification, rpart returns matrix with 2 columns predicted_prob <- predicted_prob[, 2L] } predicted_prob } else { predict(model, newdata = newdata, type = "vector") } } # /rtemis::predict_super.rpart # %% varimp_super.class_rpart ---- #' Get variable importance from rpart model #' #' @param model rpart model. #' #' @keywords internal #' @noRd method(varimp_super, class_rpart) <- function(model) { vi <- model[["variable.importance"]] VariableImportance( data.table( variable = names(vi), importance = unname(vi) ) ) } # /rtemis::varimp_super.rpart ================================================ FILE: R/train_GAM.R ================================================ # train_GAM.R # ::rtemis:: # 2025 EDG rtemis.org # %% train_.GAMHyperparameters ---- #' Train a GAM model #' #' Train a GAM model using `GAM`. #' #' GAM does not work in the presence of missing values. #' #' @param hyperparameters `GAMHyperparameters` object: make using [setup_GAM]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation Optional tabular data: Not used for GAM. #' @param verbosity Integer: If > 0, print messages. #' #' @author EDG #' @keywords internal #' @noRd method(train_, GAMHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("mgcv") # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Data ---- check_supervised( x = x, allow_missing = FALSE, verbosity = verbosity ) type <- supervised_type(x) n_classes <- if (type == "Classification") { nlevels(x[, ncol(x)]) } else { NA } # Formula ---- # use s(x, k = k) for all numeric predictors index_numeric <- which(sapply(features(x), is.numeric)) spline_features <- if (length(index_numeric) > 0) { paste0( "s(", colnames(x)[index_numeric], ", k = ", hyperparameters[["k"]], ")", collapse = " + " ) } else { "" } index_factor <- which(sapply(features(x), is.factor)) categorical_features <- if (length(index_factor) > 0) { paste0( colnames(x)[index_factor], collapse = " + " ) } else { "" } formula <- as.formula( gsub( " \\+ $", "", paste( outcome_name(x), "~", gsub( "^ \\+ ", "", paste(spline_features, categorical_features, sep = " + ") ) ) ) ) # Train ---- family <- if (type == "Regression") { gaussian() } else if (type == "Classification") { if (n_classes == 2) { binomial() } else { mgcv::multinom() } } model <- mgcv::gam( formula = formula, family = family, data = x, weights = weights ) check_inherits(model, "gam") list(model = model, preprocessor = NULL) } # /rtemis::train_.GAMHyperparameters # %% predict_super.class_gam ---- #' Predict from GAM model #' #' @param model GAM model. #' @param newdata tabular data: Data to predict on. #' @param type Character: Type of supervised learning ("Classification" or "Regression"). #' #' @keywords internal #' @noRd method(predict_super, class_gam) <- function( model, newdata, type = NULL, verbosity = 0L ) { out <- predict(object = model, newdata = newdata, type = "response") if (model[["family"]][["family"]] == "binomial") { # mgvc::predict.gam returns an array of 1 dimension that causes errors during type-checking. out <- as.numeric(out) } out } # /rtemis::predict_super.gam # %% varimp_super.class_gam ---- #' Get variable importance from GAM model #' #' Variable importance for GAM is estimated as the variance of each predictor's partial effect, #' obtained via predict(model, type = "terms"). This measures each smooth term's contribution to #' the variance of the fitted values. Values are normalized to sum to one, representing each #' predictor's proportion of total predicted variance. This approach is computationally efficient #' (no refitting required) and analogous to importance measures in tree-based methods. It assumes #' approximate uncorrelatedness of partial effects, which penalized smooths tend to satisfy. For #' models with high concurvity, consider hierarchical partitioning of R² (e.g. via the gam.hp #' package) as an alternative. #' #' @param model mgcv gam model. #' #' @keywords internal #' @noRd method(varimp_super, class_gam) <- function( model, type = c("partial_effect", "F-test") ) { peff <- predict(model, type = "terms") vi <- apply(peff, 2, var) npeff <- vi / sum(vi) # normalized importance VariableImportance( data.table( variable = names(npeff), Partial_Effect_Variance = unname(npeff) ) ) } # /rtemis::varimp_super.gam # %% se_super.class_gam ---- #' Get Standard Errors from GAM model #' #' @param model mgcv gam model. #' @param newdata tabular data: Data to predict on. #' #' @keywords internal #' @noRd method(se_super, class_gam) <- function(model, newdata) { predict(model, newdata = newdata, se.fit = TRUE)[["se.fit"]] } # /rtemis::se_super.gam ================================================ FILE: R/train_GLM.R ================================================ # train_GLM.R # ::rtemis:: # 2025 EDG rtemis.org # %% train_.GLMHyperparameters ---- #' Train a GLM model #' #' Train a GLM model using `stats::glm`. #' #' @details #' `stats::glm` does not work in the presence of missing values. #' This function uses the formula interface to `glm` to train a GLM model. #' No preprocessing is needed. #' #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param hyperparameters `GLMHyperparameters` object: make using [setup_GLM]. #' @param verbosity Integer: If > 0, print messages. #' #' @return GLM model. #' #' @author EDG #' @keywords internal #' @noRd method(train_, GLMHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Data ---- check_supervised( x = x, allow_missing = FALSE, verbosity = verbosity ) if (is.null(weights)) { weights <- rep(1, NROW(x)) } type <- supervised_type(x) if (type == "Classification") { n_classes <- nlevels(outcome(x)) if (n_classes > 2L) { cli::cli_abort("GLM does not support multiclass classification") } } else { n_classes <- NA_integer_ } # Formula ---- formula <- as.formula( paste( names(x)[ncol(x)], "~ ." ) ) # Train ---- family <- if (type == "Regression") { gaussian() } else if (type == "Classification") { binomial() } model <- glm( formula = formula, family = family, data = x, weights = weights ) check_inherits(model, "glm") list(model = model, preprocessor = NULL) } # /rtemis::train_.GLMHyperparameters # %% predict_super.class_glm ---- #' Predict from GLM model #' #' @param model GLM model. #' @param newdata data.frame or similar: Data to predict on. #' #' @keywords internal #' @noRd method(predict_super, class_glm) <- function( model, newdata, type = NULL, verbosity = 0L ) { predict(model, newdata = newdata, type = "response") } # /rtemis::predict_super.glm # %% varimp_super.class_glm ---- #' Get coefficients from GLM model #' #' @param model GLM model. #' #' @keywords internal #' @noRd method(varimp_super, class_glm) <- function( model, type = c("coefficients", "p-value") ) { type <- match.arg(type) .coef <- if (type == "coefficients") { coef(model) } else if (type == "p-value") { summary(model)[["coefficients"]][, 4] } VariableImportance( data.table( variable = names(.coef), Coefficient = unname(.coef) ) ) } # /rtemis::varimp_super.glm # %% se_super.class_glm ---- #' Get Standard Errors from GLM model #' #' @param model GLM model. #' @param newdata data.frame or similar: Data to predict on. #' #' @author EDG #' @keywords internal #' @noRd method(se_super, class_glm) <- function(model, newdata) { predict(model, newdata = newdata, se.fit = TRUE)[["se.fit"]] } # /rtemis::se_super.glm ================================================ FILE: R/train_GLMNET.R ================================================ # train_GLMNET.R # ::rtemis:: # 2025- EDG rtemis.org # %% train_.GLMNETHyperparameters ---- #' Train a GLMNET model #' #' Train a GLMNET model using `glmnet`. #' #' GLMNET does not work in the presence of missing values. #' #' @param hyperparameters `GLMNETHyperparameters` object: make using [setup_GLMNET]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation tabular data: Validation set (unused). #' @param verbosity Integer: If > 0, print messages. #' #' @author EDG #' @keywords internal #' @noRd method(train_, GLMNETHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("glmnet") # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Convert "null" lambda to NULL if (hyperparameters[["lambda"]] == "null") { hyperparameters@hyperparameters[["lambda"]] <- NULL } # Data ---- check_supervised( x = x, allow_missing = FALSE, verbosity = verbosity ) # weights can't be NULL. if (is.null(weights)) { weights <- rep(1, NROW(x)) } type <- supervised_type(x) n_classes <- if (type == "Classification") { nlevels(outcome(x)) } else { NA_integer_ } family <- if (is.null(hyperparameters[["family"]])) { if (type == "Regression") { "gaussian" } else if (type == "Classification") { if (n_classes == 2L) { "binomial" } else { "multinomial" } } } # Train ---- # Create xm so that the correct NCOL is used for penalty_factor, # since factors are converted to dummy variables. xm <- as.matrix( model.matrix(~., exc(x, NCOL(x)))[, -1] ) # Check data-specific hyperparameter values # penalty_factor must be of length = N features. if (is.null(hyperparameters[["penalty_factor"]])) { hyperparameters@hyperparameters[["penalty_factor"]] <- rep(1, NCOL(xm)) if (verbosity > 1L) { msg_info("NCOL(xm): ", NCOL(xm)) msg_info('Updated hyperparameters[["penalty_factor"]] to all 1s.') } } else { if (length(hyperparameters[["penalty_factor"]]) != NCOL(xm)) { cli::cli_abort( "Length of penalty_factor must be equal to the number of predictors." ) } } # if lambda is NULL, use cv.glmnet to find optimal lambda if (is.null(hyperparameters[["lambda"]])) { model <- glmnet::cv.glmnet( x = xm, y = outcome(x), family = family, weights = weights, offset = hyperparameters[["offset"]], alpha = hyperparameters[["alpha"]], nlambda = hyperparameters[["nlambda"]], standardize = hyperparameters[["standardize"]], intercept = hyperparameters[["intercept"]], # can't be NULL penalty.factor = hyperparameters[["penalty_factor"]] ) check_inherits(model, "cv.glmnet") } else { model <- glmnet::glmnet( x = xm, y = outcome(x), family = family, weights = weights, offset = hyperparameters[["offset"]], alpha = hyperparameters[["alpha"]], nlambda = hyperparameters[["nlambda"]], lambda = hyperparameters[["lambda"]], standardize = hyperparameters[["standardize"]], intercept = hyperparameters[["intercept"]], # can't be NULL penalty.factor = hyperparameters[["penalty_factor"]] ) check_inherits(model, "glmnet") } list(model = model, preprocessor = NULL) } # /rtemis::train_.GLMNETHyperparameters #' Predict from GLMNET model #' #' @param model glmnet model. #' @param newdata data.frame or similar: Data to predict on. #' @param type Optional character: "Regression" or "Classification". Auto-detected if NULL. #' #' @author EDG #' @keywords internal #' @noRd method(predict_super, class_glmnet) <- function( model, newdata, type = NULL, verbosity = 0L ) { # Determine type # if model@classnames exists, type is Classification if (is.null(type)) { type <- if (!is.null(model[["classnames"]])) { "Classification" } else { "Regression" } } newdata <- as.matrix( model.matrix(~., newdata)[, -1, drop = FALSE] ) if (type == "Regression") { predict(model, newx = newdata, type = "response")[, 1] } else if (type == "Classification") { predicted_prob <- predict(model, newx = newdata, type = "response") if (NCOL(predicted_prob) == 1) { # In binary classification, glmnet returns matrix with 1 column # with probabilities of second level. predicted_prob <- as.numeric(predicted_prob) } predicted_prob } } # /rtemis::predict_super.class_glmnet #' @keywords internal #' @noRd method(predict_super, class_cv.glmnet) <- function( model, newdata, type = NULL, verbosity = 0L ) { # Determine type # if model@classnames exists, type is Classification if (is.null(type)) { type <- if (!is.null(model[["classnames"]])) { "Classification" } else { "Regression" } } newdata <- as.matrix( model.matrix(~., newdata)[, -1, drop = FALSE] ) if (type == "Regression") { predict(model, newx = newdata, type = "response")[, 1] } else if (type == "Classification") { predicted_prob <- predict(model, newx = newdata, type = "response") if (NCOL(predicted_prob) == 1) { # In binary classification, glmnet returns matrix with 1 column # with probabilities of second level. predicted_prob <- as.numeric(predicted_prob) } predicted_prob } } # /rtemis::predict_super.class_cv.glmnet # %% varimp_super.class_glmnet ---- #' Get coefficients from GLMNET model #' #' @param model glmnet model. #' #' @keywords internal #' @noRd method(varimp_super, class_glmnet) <- function(model) { coefs <- coef(model) # In multiclass, coef(model) returns a list of coefficient matrices, one per class. # Not yet supported as VariableImportance. if (is.list(coefs)) { return(NULL) } if (NCOL(coefs) > 1) { msg("GLMNET with multiple sets of coefficients - returning first column.") } # Exclude intercept coefs <- coefs[, 1][-1] VariableImportance( data.table( variable = names(coefs), Coefficient = unname(coefs) ) ) } # /rtemis::varimp_super.class_glmnet # %% varimp_super.class_cv.glmnet ---- #' @keywords internal #' @noRd method(varimp_super, class_cv.glmnet) <- function(model) { coefs <- coef(model) # In multiclass, coef(model) returns a list of coefficient matrices, one per class. # Not yet supported as VariableImportance. if (is.list(coefs)) { return(NULL) } # Exclude intercept coefs <- coefs[, 1][-1] VariableImportance( data.table( variable = names(coefs), Coefficient = unname(coefs) ) ) } # /rtemis::varimp_super.class_cv.glmnet ================================================ FILE: R/train_Isotonic.R ================================================ # train_Isotonic.R # ::rtemis:: # 2025- EDG rtemis.org # %% train_.IsotonicHyperparameters ---- #' Train an Isotonic model #' #' @details #' This is primarily used for calibration of classification models. #' Binary classification will not work if x and y are not monotonic, i.e. higher values in `x` must #' correspond to `1`, i.e. positive class in y. #' outcome `1`. #' #' @param hyperparameters `IsotonicHyperparameters` object: make using [setup_Isotonic]. #' @param x tabular data: Training set. Only a single predictor is allowed. #' @param weights Not used. #' @param dat_validation Not used. #' @param verbosity Integer: If > 0, print messages. #' #' @return Object of class `stepfun`. #' #' @author EDG #' @keywords internal #' @noRd method(train_, IsotonicHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Data ---- check_supervised( x = x, allow_missing = FALSE, verbosity = verbosity ) if (NCOL(x) > 2) { cli::cli_abort("Isotonic requires a single predictor.") } if (!is.null(weights)) { cli::cli_abort("Isotonic does not support weights.") } type <- supervised_type(x) if (type == "Classification") { n_classes <- nlevels(outcome(x)) if (n_classes > 2L) { cli::cli_abort("Isotonic does not support multiclass classification") } # Assuming binclasspos = 2L y <- as.numeric(x[[2]]) - 1 } else { y <- x[[2]] n_classes <- NA_integer_ } # Model ---- ir <- isoreg(cbind(x[[1]], y)) model <- as.stepfun(ir) check_inherits(model, "stepfun") list(model = model, preprocessor = NULL) } # /rtemis::train_.IsotonicHyperparameters # %% predict_super.class_stepfun ---- #' Predict from Isotonic model #' #' @param model Isotonic model. #' @param newdata data.frame or similar: Data to predict on. #' @param type Not used. #' #' @author EDG #' @keywords internal #' @noRd method(predict_super, class_stepfun) <- function( model, newdata, type = NULL, verbosity = 0L ) { model(newdata[[1]]) } # /rtemis::predict_super.class_stepfun # %% varimp_super.class_stepfun ---- #' Get coefficients from Isotonic model #' #' @param model Isotonic model. #' #' @keywords internal #' @noRd method(varimp_super, class_stepfun) <- function(model) { NULL } # /rtemis::varimp_super.class_stepfun ================================================ FILE: R/train_LightCART.R ================================================ # train_LightCART.R # ::rtemis:: # 2025- EDG rtemis.org # %% train_.LightCARTHyperparameters ---- #' Decision Tree using LightGBM #' #' @param hyperparameters `LightCARTHyperparameters` object: make using [setup_LightCART]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation data.frame or similar: Validation set (not used for LightCART). #' @param verbosity Integer: If > 0, print messages. #' #' @author EDG #' @keywords internal #' @noRd method(train_, LightCARTHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("lightgbm") # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Data ---- check_supervised( x = x, allow_missing = TRUE, verbosity = verbosity ) type <- supervised_type(x) if (type == "Classification") { nclasses <- nlevels(outcome(x)) } else { nclasses <- 1L } if (is.null(hyperparameters[["objective"]])) { hyperparameters@hyperparameters[["objective"]] <- if ( type == "Regression" ) { "regression" } else { if (nclasses == 2L) { "binary" } else { "multiclass" } } } ## Preprocess & create lgb.Dataset ---- lgb_data <- prepare_lgb_data( x = x, type = type, weights = weights, verbosity = verbosity ) x <- lgb_data[["train_data"]] prp <- lgb_data[["preprocessor"]] # Train ---- params <- hyperparameters@hyperparameters params[["ifw"]] <- NULL # num_class is required for multiclass classification only, must be 1 or unset for regression & binary classification if (nclasses > 2L) { params[["num_class"]] <- nclasses } # Set n threads params[["num_threads"]] <- 1L model <- lightgbm::lgb.train( params = params, data = x, nrounds = 1L, valids = list(training = x), early_stopping_rounds = NULL, verbose = verbosity - 2L ) check_inherits(model, "lgb.Booster") list(model = model, preprocessor = prp) } # /rtemis::train_.LightCARTHyperparameters ================================================ FILE: R/train_LightGBM.R ================================================ # train_LightGBM.R # ::rtemis:: # 2025 EDG rtemis.org # LightGBM parameters # https://lightgbm.readthedocs.io/en/latest/Parameters.html # %% train_.LightGBMHyperparameters ---- #' Gradient Boosting with LightGBM #' #' @param hyperparameters `LightGBMHyperparameters` object: make using [setup_LightGBM]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation Optional tabular data: Validation set for early stopping. #' @param verbosity Integer: If > 0, print messages. #' #' @author EDG #' @keywords internal #' @noRd method(train_, LightGBMHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("lightgbm") # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Convert "null" nrounds to max_nrounds if (hyperparameters[["nrounds"]] == "null") { hyperparameters@hyperparameters[["nrounds"]] <- hyperparameters[[ "max_nrounds" ]] } # Data ---- check_supervised( x = x, dat_validation = dat_validation, allow_missing = TRUE, verbosity = verbosity ) type <- supervised_type(x) ## Objective ---- if (type == "Classification") { nclasses <- nlevels(outcome(x)) } else { nclasses <- 1L } if (is.null(hyperparameters[["objective"]])) { hyperparameters@hyperparameters[["objective"]] <- if ( type == "Regression" ) { "regression" } else { if (nclasses == 2L) { "binary" } else { "multiclass" } } } ## Preprocess & create lgb.Datasets ---- lgb_data <- prepare_lgb_data( x = x, dat_validation = dat_validation, type = type, weights = weights, verbosity = verbosity ) x <- lgb_data[["train_data"]] dat_validation <- lgb_data[["valid_data"]] prp <- lgb_data[["preprocessor"]] # Train ---- params <- hyperparameters@hyperparameters params[["nrounds"]] <- params[["max_nrounds"]] <- params[[ "early_stopping_rounds" ]] <- params[["force_nrounds"]] <- params[["ifw"]] <- NULL # num_class is required for multiclass classification only, must be 1 or unset for regression & binary classification if (nclasses > 2L) { params[["num_class"]] <- nclasses } # Set n threads params[["num_threads"]] <- prop(hyperparameters, "n_workers") model <- lightgbm::lgb.train( params = params, data = x, nrounds = hyperparameters[["nrounds"]], valids = if (!is.null(dat_validation)) { list(training = x, validation = dat_validation) } else { list(training = x) }, early_stopping_rounds = hyperparameters[["early_stopping_rounds"]], verbose = verbosity - 1L ) check_inherits(model, "lgb.Booster") list(model = model, preprocessor = prp) } # /rtemis::train_.LightGBMHyperparameters # %% predict_super.class_lgb.Booster ---- #' Predict from LightGBM model #' #' @param model lgb.Booster object. #' @param newdata tabular data: Data to predict on. Will have been preprocessed by #' `predict.Supervised` before calling this method if algorithm-specific preprocessing was performed during training. #' @param type Character: Type of supervised learning. #' #' @keywords internal #' @noRd method(predict_super, class_lgb.Booster) <- function( model, newdata, type = NULL, verbosity = 0L ) { check_inherits(model, "lgb.Booster") check_inherits(newdata, "data.frame") # Algorithm-specific preprocessing (factor2integer) is applied by # predict.Supervised before calling this method. See R/train.R and R/07_Supervised.R # Predict ---- predict(model, newdata = as.matrix(newdata)) } # /rtemis::predict_super.lgb.Booster # %% varimp_super.class_lgb.Booster ---- #' Get variable importance from LightGBM model #' #' @param model lgb.Booster object. #' #' @keywords internal #' @noRd method(varimp_super, class_lgb.Booster) <- function(model) { check_inherits(model, "lgb.Booster") vi <- lightgbm::lgb.importance(model, percentage = TRUE) # -> data.table names(vi)[1] <- "variable" VariableImportance(vi) } # /rtemis::varimp_super.lgb.Booster ================================================ FILE: R/train_LightRF.R ================================================ # train_LightRF.R # ::rtemis:: # 2025 EDG rtemis.org # References # LightGBM parameters: https://lightgbm.readthedocs.io/en/latest/Parameters.html # %% train_.LightRFHyperparameters ---- #' Random Forest using LightGBM #' #' @param hyperparameters `LightRFHyperparameters` object: make using [setup_LightRF]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation Optional tabular data: Validation set for early stopping. #' @param verbosity Integer: If > 0, print messages. #' #' @author EDG #' @keywords internal #' @noRd method(train_, LightRFHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("lightgbm") # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Data ---- check_supervised( x = x, dat_validation = dat_validation, allow_missing = TRUE, verbosity = verbosity ) type <- supervised_type(x) if (type == "Classification") { nclasses <- nlevels(outcome(x)) } else { nclasses <- 1L } if (is.null(hyperparameters[["objective"]])) { hyperparameters@hyperparameters[["objective"]] <- if ( type == "Regression" ) { "regression" } else { if (nclasses == 2L) { "binary" } else { "multiclass" } } } ## Preprocess & create lgb.Datasets ---- lgb_data <- prepare_lgb_data( x = x, dat_validation = dat_validation, type = type, weights = weights, verbosity = verbosity ) x <- lgb_data[["train_data"]] dat_validation <- lgb_data[["valid_data"]] prp <- lgb_data[["preprocessor"]] # Train ---- params <- hyperparameters@hyperparameters # Remove params that are not used by LightGBM params[["ifw"]] <- NULL params[["nrounds"]] <- params[["early_stopping_rounds"]] <- NULL # num_class is required for multiclass classification only, must be 1 or unset for regression & binary classification if (nclasses > 2L) { params[["num_class"]] <- nclasses } # Set n threads params[["num_threads"]] <- prop(hyperparameters, "n_workers") model <- lightgbm::lgb.train( params = params, data = x, nrounds = hyperparameters[["nrounds"]], valids = if (!is.null(dat_validation)) { list(training = x, validation = dat_validation) } else { list(training = x) }, early_stopping_rounds = hyperparameters[["early_stopping_rounds"]], verbose = verbosity - 2L ) check_inherits(model, "lgb.Booster") list(model = model, preprocessor = prp) } # /rtemis::train_.LightRFHyperparameters ================================================ FILE: R/train_LightRuleFit.R ================================================ # train_LightRuleFit.R # ::rtemis:: # 2025 EDG rtemis.org # %% train_.LightRuleFitHyperparameters ---- #' Train a LightRuleFit model #' #' Train a LightRuleFit model using LightGBM and GLMNET. #' #' @param hyperparameters `LightRuleFitHyperparameters` object: make using [setup_LightRuleFit]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation tabular data: Validation set. #' @param verbosity Integer: If > 0, print messages. #' #' @author EDG #' @keywords internal #' @noRd method(train_, LightRuleFitHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("lightgbm", "glmnet", "matrixStats", "gsubfn") # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Data ---- check_supervised( x = x, dat_validation = dat_validation, allow_missing = TRUE, verbosity = verbosity ) type <- supervised_type(x) nclasses <- if (type == "Classification") nlevels(x[[ncol(x)]]) else 1L # IFW for LightGBM ---- # See setup_LightRuleFit: You can choose to use IFW for both steps with `ifw = TRUE` OR control each steps individually using `ifw_lightgbm` and `ifw_glmnet`. lightgbm_weights <- if (hyperparameters[["ifw_lightgbm"]]) { ifw(x[[ncol(x)]], verbosity = verbosity) } else { weights } # Train Gradient Boosting using LightGBM ---- # LightRuleFit_tunable includes the names of all LightGBM hyperparameters used by LightRuleFit. lgbm_parameters <- update( setup_LightGBM(), get_hyperparams(hyperparameters, LightRuleFit_lightgbm_params) ) lgbm_parameters@hyperparameters[["ifw"]] <- hyperparameters[["ifw_lightgbm"]] mod_lgbm <- train( x = x, dat_validation = dat_validation, weights = lightgbm_weights, hyperparameters = lgbm_parameters, # tuner_config = tuner_config, # ? add tuner_config to LightRuleFitHyperparameters outer_resampling_config = NULL, execution_config = execution_config, verbosity = verbosity ) # Extract Rules from Boosted Trees ---- lgbm_rules <- extract_rules( mod_lgbm@model, n_iter = NULL, xnames = names(x), factor_levels = get_factor_levels(x) ) # Match cases x rules ---- cases_by_rules <- match_cases_by_rules(x, lgbm_rules, verbosity = verbosity) # IFW for LASSO ---- glmnet_weights <- if (hyperparameters[["ifw_glmnet"]]) { ifw(x[[ncol(x)]], verbosity = verbosity) } else { weights } # LASSO: Select Rules ---- lasso_hyperparameters <- setup_GLMNET( alpha = hyperparameters[["alpha"]], lambda = hyperparameters[["lambda"]] ) dat_rules <- data.frame(cases_by_rules, y = x[[ncol(x)]]) colnames(dat_rules)[ncol(dat_rules)] <- colnames(x)[ncol(x)] mod_glmnet <- train( dat_rules, hyperparameters = lasso_hyperparameters, weights = glmnet_weights, execution_config = execution_config, verbosity = verbosity ) # Rule coefficients ---- rules_coefs <- data.matrix(coef(mod_glmnet@model)) # Need special handling for multiclass support starting here intercept_coef <- rules_coefs[1, , drop = FALSE] colnames(intercept_coef) <- "Coefficient" rules_coefs <- data.frame(Rule = lgbm_rules, Coefficient = rules_coefs[-1, 1]) nonzero_index <- which(abs(rules_coefs[["Coefficient"]]) > 0) rules_selected <- lgbm_rules[nonzero_index] cases_by_rules_selected <- cases_by_rules[, nonzero_index] Ncases_by_rules <- matrixStats::colSums2(cases_by_rules_selected) # Empirical risk ---- if (type == "Classification" && nclasses == 2) { x <- as.data.table(x) empirical_risk <- vector("numeric", length(rules_selected)) for (i in seq_along(rules_selected)) { match <- x[eval(parse(text = rules_selected[i])), ] freq <- table(match[[ncol(match)]]) empirical_risk[i] <- freq[mod_glmnet@binclasspos] / sum(freq) } } else { empirical_risk <- NULL } # Format Rules ---- # => Check format_LightRuleFit_rules' use of gsubfn::gsubfn rules_selected_formatted <- format_LightRuleFit_rules( rules_selected, decimal_places = 2 ) # appease R CMD check Coefficient <- NULL rules_selected_formatted_coefs <- data.table( Rule_ID = seq(rules_selected_formatted), Rule = rules_selected_formatted, N_Cases = Ncases_by_rules, Coefficient = rules_coefs[["Coefficient"]][nonzero_index] ) if (type == "Classification" && nclasses == 2) { # appease R CMD check Empirical_Risk <- NULL rules_selected_formatted_coefs[, Empirical_Risk := empirical_risk] } setorder(rules_selected_formatted_coefs, -Coefficient) # LightRuleFit ---- model <- LightRuleFit( model_lightgbm = mod_lgbm, model_glmnet = mod_glmnet, rules = lgbm_rules, rules_coefs = rules_coefs, rules_index = nonzero_index, rules_selected = rules_selected, rules_selected_formatted = rules_selected_formatted, rules_selected_formatted_coefs = rules_selected_formatted_coefs, y_levels = levels(x[[ncol(x)]]), xnames = names(x)[-ncol(x)], complexity_metrics = data.frame( n_rules_total = length(lgbm_rules), n_nonzero_rules = length(nonzero_index) ) ) list(model = model, preprocessor = NULL) } # /rtemis::train_.LightRuleFitHyperparameters # %% predict_super.LightRuleFitHyperparameters ---- #' Predict from LightRuleFit LightGBM model #' #' @param model LightRuleFit object trained using `train_LightRuleFit`. #' @param newdata data.frame or similar: Data to predict on. #' #' @keywords internal #' @noRd method(predict_super, LightRuleFit) <- function( model, newdata, type = NULL, verbosity = 0L ) { check_inherits(newdata, "data.frame") rules <- model@rules cases_by_rules <- match_cases_by_rules(newdata, rules, verbosity = verbosity) datm <- data.matrix(cases_by_rules) if (model@model_lightgbm@type == "Classification") { predicted <- predict( model@model_glmnet@model, newx = datm, type = "response" ) if (length(model@y_levels) == 2) { predicted[, 1] } else { predicted } } else { as.numeric(predict(model@model_glmnet@model, newx = datm)) } } # /rtemis::predict_super.LightRuleFit # %% varimp_super.LightRuleFit ---- #' Get variable importance from LightRuleFit model #' #' @param model LightRuleFit object trained using `train_LightRuleFit`. #' #' @keywords internal #' @noRd method(varimp_super, LightRuleFit) <- function(model) { .coef <- coef(model@model_glmnet@model)[-1, , drop = FALSE] VariableImportance( data.table( variable = rownames(.coef), Coefficient = unname(.coef[, 1]) ) ) } # /rtemis::varimp_super.LightRuleFit ================================================ FILE: R/train_Ranger.R ================================================ # train_Ranger.R # ::rtemis:: # 2025 EDG rtemis.org # References # https://imbs-hl.github.io/ranger/reference/ranger.html # %% train_.RangerHyperparameters ---- #' Random Forest using Ranger #' #' @param hyperparameters `RangerHyperparameters`: Hyperparameters for Ranger. #' @param x tabular data: Training data. #' @param weights Numeric vector: Case weights. #' @param dat_validation tabular data: Validation data (currently unused). #' @param verbosity Integer: Verbosity level. #' #' @return `ranger` model object. #' #' @author EDG #' @keywords internal #' @noRd method(train_, RangerHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("ranger") # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # mtry cannot be larger than number of features if (any(hyperparameters@hyperparameters[["mtry"]] > NCOL(features(x)))) { cli::cli_abort( "mtry cannot be greater than number of features: {ncol(features(x))}." ) } # Data ---- check_supervised( x = x, allow_missing = TRUE, verbosity = verbosity ) type <- supervised_type(x) # Train ---- model <- ranger::ranger( formula = NULL, x = features(x), y = outcome(x), num.trees = hyperparameters@hyperparameters[["num_trees"]], mtry = hyperparameters@hyperparameters[["mtry"]], importance = hyperparameters@hyperparameters[["importance"]], write.forest = hyperparameters@hyperparameters[["write_forest"]], probability = type == "Classification", min.node.size = hyperparameters@hyperparameters[["min_node_size"]], min.bucket = hyperparameters@hyperparameters[["min_bucket"]], max.depth = hyperparameters@hyperparameters[["max_depth"]], replace = hyperparameters@hyperparameters[["replace"]], sample.fraction = hyperparameters@hyperparameters[["sample_fraction"]], case.weights = weights, splitrule = hyperparameters@hyperparameters[["splitrule"]], num.random.splits = hyperparameters@hyperparameters[["num_random_splits"]], alpha = hyperparameters@hyperparameters[["alpha"]], minprop = hyperparameters@hyperparameters[["minprop"]], poisson.tau = hyperparameters@hyperparameters[["poisson_tau"]], split.select.weights = hyperparameters@hyperparameters[[ "split_select_weights" ]], always.split.variables = hyperparameters@hyperparameters[[ "always_split_variables" ]], respect.unordered.factors = hyperparameters@hyperparameters[[ "respect_unordered_factors" ]], scale.permutation.importance = hyperparameters@hyperparameters[[ "scale_permutation_importance" ]], local.importance = hyperparameters@hyperparameters[["local_importance"]], regularization.factor = hyperparameters@hyperparameters[[ "regularization_factor" ]], regularization.usedepth = hyperparameters@hyperparameters[[ "regularization_usedepth" ]], keep.inbag = hyperparameters@hyperparameters[["keep_inbag"]], inbag = hyperparameters@hyperparameters[["inbag"]], holdout = hyperparameters@hyperparameters[["holdout"]], quantreg = hyperparameters@hyperparameters[["quantreg"]], time.interest = hyperparameters@hyperparameters[["time_interest"]], oob.error = hyperparameters@hyperparameters[["oob_error"]], num.threads = prop(hyperparameters, "n_workers"), save.memory = hyperparameters@hyperparameters[["save_memory"]], verbose = verbosity > 0L, node.stats = hyperparameters@hyperparameters[["node_stats"]], seed = hyperparameters@hyperparameters[["seed"]], na.action = hyperparameters@hyperparameters[["na_action"]] ) check_inherits(model, "ranger") list(model = model, preprocessor = NULL) } # /rtemis::train_.RangerHyperparameters #' Predict from Ranger model #' #' @param model `ranger` model object. #' @param newdata data.frame or similar: Data to predict on. #' @param type Character: Prediction type. #' @param verbosity Integer: Verbosity level. #' @param ranger_type Character: Ranger prediction type. #' @param ... Additional arguments passed to ranger predict. #' #' @keywords internal #' @noRd method(predict_super, class_ranger) <- function( model, newdata, type = NULL, verbosity = 0L ) { check_inherits(model, "ranger") check_inherits(newdata, "data.frame") # Predict ---- predicted <- predict( model, data = newdata, type = "response", verbose = verbosity > 0L )[["predictions"]] if (type == "Classification" && NCOL(predicted) == 2L) { # In binary classification, ranger returns matrix with 2 columns # with probabilities for each class predicted <- predicted[, 2L] } predicted } # /rtemis::predict_super.class_ranger # %% varimp_super.class_ranger ---- #' Get variable importance from Ranger model #' #' @param model `ranger` model object. #' #' @keywords internal #' @noRd method(varimp_super, class_ranger) <- function(model) { check_inherits(model, "ranger") vi <- ranger::importance(model) VariableImportance( data.table( variable = names(vi), importance = unname(vi) ) ) } # /rtemis::varimp_super.class_ranger # %% validate_hyperparameters.RangerHyperparameters ---- #' Validate Ranger Hyperparameters #' #' Validate Ranger Hyperparameters given training data. #' #' @param x tabular data: Training data. #' @param hyperparameters `RangerHyperparameters`: Hyperparameters to check. #' #' @return NULL. Will throw error if hyperparameters are invalid. #' #' @keywords internal #' @noRd method(validate_hyperparameters, RangerHyperparameters) <- function( x, hyperparameters ) { check_is_S7(x, class_data.frame) check_is_S7(hyperparameters, RangerHyperparameters) # Check mtry if (any(hyperparameters@hyperparameters[["mtry"]] > NCOL(features(x)))) { cli::cli_abort( "mtry cannot be greater than number of features: {ncol(features(x))}." ) } hyperparameters } # /rtemis::validate_hyperparameters.RangerHyperparameters ================================================ FILE: R/train_SVM.R ================================================ # train_SVM.R # ::rtemis:: # 2025- EDG rtemis.org # %% train_.LinearSVMHyperparameters ---- #' Train a Linear SVM model #' #' Train a Linear SVM model using `e1071::svm`. #' #' SVM does not work in the presence of missing values. #' #' @param hyperparameters `LinearSVMHyperparameters` object: make using [setup_LinearSVM]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation Optional tabular data: Not used for Linear SVM. #' @param verbosity Integer: If > 0, print messages. #' #' @return Object of class `svm`. #' #' @author EDG #' @keywords internal #' @noRd method(train_, LinearSVMHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("e1071") # Checks ---- if (!is.null(weights)) { cli::cli_abort( "Case weights are not supported by e1071::svm. You can enable `ifw` in the hyperparameters to use inverse frequency weighting instead." ) } # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Data ---- check_supervised( x = x, allow_missing = FALSE, verbosity = verbosity ) type <- supervised_type(x) n_classes <- if (type == "Classification") { nlevels(outcome(x)) } else { NA } # Preprocess ---- # One-hot encode y <- outcome(x) x <- features(x) factor_index <- names(x)[which(sapply(x, is.factor))] if (length(factor_index) > 0L) { prp <- preprocess( x, config = setup_Preprocessor(one_hot = TRUE), verbosity = verbosity ) x <- preprocessed(prp) } else { prp <- NULL } # Can use class_weights or set class.weights = "inverse" in svm() # if (is.null(weights)) { # weights <- rep(1, NROW(x)) # } # Train ---- class_weights <- if ( type == "Classification" && n_classes == 2 && hyperparameters[["ifw"]] ) { "inverse" } else { NULL } # gamma can't be NULL even if not used gamma <- hyperparameters[["gamma"]] if (is.null(gamma)) { gamma <- 1 } model <- e1071::svm( x = x, y = y, # factor or numeric kernel = hyperparameters[["kernel"]], cost = hyperparameters[["cost"]], gamma = gamma, class.weights = class_weights, probability = TRUE ) check_inherits(model, "svm") list(model = model, preprocessor = prp) } # /rtemis::train_.LinearSVMHyperparameters # %% train_.RadialSVMHyperparameters ---- #' Train a Radial SVM model #' #' Train a Radial SVM model using `e1071::svm`. #' #' SVM does not work in the presence of missing values. #' #' @param hyperparameters `RadialSVMHyperparameters` object: make using [setup_RadialSVM]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation Optional tabular data: Not used for Radial SVM. #' @param verbosity Integer: If > 0, print messages. #' #' @return Object of class `svm`. #' #' @author EDG #' @keywords internal #' @noRd method(train_, RadialSVMHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("e1071") # Checks ---- if (!is.null(weights)) { cli::cli_abort( "Case weights are not supported by e1071::svm. You can enable `ifw` in the hyperparameters to use inverse frequency weighting instead." ) } # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Data ---- check_supervised( x = x, allow_missing = FALSE, verbosity = verbosity ) type <- supervised_type(x) n_classes <- if (type == "Classification") { nlevels(outcome(x)) } else { NA } # Preprocess ---- # One-hot encode y <- outcome(x) x <- features(x) factor_index <- names(x)[which(sapply(x, is.factor))] if (length(factor_index) > 0L) { prp <- preprocess( x, config = setup_Preprocessor(one_hot = TRUE), verbosity = verbosity ) x <- preprocessed(prp) } else { prp <- NULL } # Can use class_weights or set class.weights = "inverse" in svm() # if (is.null(weights)) { # weights <- rep(1, NROW(x)) # } # Train ---- class_weights <- if ( type == "Classification" && n_classes == 2 && hyperparameters[["ifw"]] ) { "inverse" } else { NULL } # gamma can't be NULL even if not used gamma <- hyperparameters[["gamma"]] if (is.null(gamma)) { gamma <- 1 } model <- e1071::svm( x = x, y = y, # factor or numeric kernel = hyperparameters[["kernel"]], cost = hyperparameters[["cost"]], gamma = gamma, class.weights = class_weights, probability = TRUE ) check_inherits(model, "svm") list(model = model, preprocessor = prp) } # /rtemis::train_.RadialSVMHyperparameters # %% predict_super.svm ---- #' Predict from SVM model #' #' @param model SVM model. #' @param newdata data.frame or similar: Data to predict on. #' @param type Character: Type of supervised learning ("Classification" or "Regression"). #' #' @keywords internal #' @noRd method(predict_super, class_svm) <- function( model, newdata, type = NULL, verbosity = 0L ) { if (type == "Classification") { predicted_prob <- attr( predict(model, newdata = newdata, probability = TRUE), "probabilities" ) if (length(model$levels) == 2) { predicted_prob[, 2] } else { predicted_prob } } else { predict(model, newdata = newdata) } } # /rtemis::predict_super.svm # %% varimp_super.class_svm ---- #' Get coefficients from SVM model #' #' @param model SVM model. #' #' @keywords internal #' @noRd method(varimp_super, class_svm) <- function(model) { # Only for linear kernel with binary classification if (model[["kernel"]] == 0L && model[["nclasses"]] == 2) { .coefs <- coef(model) VariableImportance( data.table( variable = names(.coefs), Coefficient = unname(.coefs) ) ) } else { NULL } } # /rtemis::varimp_super.svm ================================================ FILE: R/train_TabNet.R ================================================ # train_TabNet.R # ::rtemis:: # 2025 EDG rtemis.org # %% train_.TabNetHyperparameters ---- #' Train a TabNet model #' #' Train a TabNet model using `TabNet`. #' #' TabNet does not work in the presence of missing values. #' #' @param hyperparameters `TabNetHyperparameters` object: make using [setup_TabNet]. #' @param x tabular data: Training set. #' @param weights Numeric vector: Case weights. #' @param dat_validation tabular data: Validation set for early stopping. #' @param verbosity Integer: Verbosity level. #' #' @return Object of class `TabNet`. #' #' @author EDG #' @keywords internal #' @noRd method(train_, TabNetHyperparameters) <- function( hyperparameters, x, weights = NULL, dat_validation = NULL, execution_config = setup_ExecutionConfig(), verbosity = 1L ) { # Dependencies ---- check_dependencies("torch", "tabnet") # Hyperparameters ---- # Hyperparameters must be either untunable or frozen by `train`. if (needs_tuning(hyperparameters)) { cli::cli_abort("Hyperparameters must be fixed - use train() instead.") } # Data ---- check_supervised( x = x, allow_missing = FALSE, verbosity = verbosity ) type <- supervised_type(x) # Scale data ---- y <- outcome(x) prp <- preprocess( features(x), config = setup_Preprocessor(scale = TRUE, center = TRUE) ) x <- prp@preprocessed # Train ---- # The predictor data should be standardized (e.g. centered or scaled). The model treats # categorical predictors internally thus, you don't need to make any treatment. config <- get_tabnet_config(hyperparameters) config[["verbose"]] <- verbosity > 0L model <- tabnet::tabnet_fit( x = x, y = y, config = config, weights = weights ) check_inherits(model, "tabnet_fit") list(model = model, preprocessor = prp) } # /rtemis::train_.TabNetHyperparameters # %% predict_super.class_tabnet_fit ---- #' Predict from TabNet model #' #' @param model TabNet model. #' @param newdata data.frame or similar: Data to predict on. #' @param type Character: "Regression" or "Classification". #' #' @keywords internal #' @noRd method(predict_super, class_tabnet_fit) <- function( model, newdata, type = NULL, verbosity = 0L ) { if (type == "Regression") { predict(model, new_data = newdata)[[1]] } else if (type == "Classification") { predicted <- predict(model, new_data = newdata, type = "prob") if (NCOL(predicted) == 2) { predicted[[2]] } else { predicted } } } # /rtemis::predict_super.class_tabnet_fit # %% varimp_super.class_tabnet_fit ---- #' Get variable importance from TabNet model #' #' @param model TabNet model. #' #' @keywords internal #' @noRd method(varimp_super, class_tabnet_fit) <- function(model) { NULL } # /rtemis::varimp_super.class_tabnet_fit ================================================ FILE: R/tune.R ================================================ # tune.R # ::rtemis:: # 2025 EDG rtemis.org # %% get_tuner_fn ---- #' Get Tuner Function #' #' @param type Character: Type of tuner. #' #' @author EDG #' #' @keywords internal #' @noRd get_tuner_fn <- function(type = "GridSearch") { type <- match_arg(type, c("GridSearch")) switch(type, "GridSearch" = "tune_GridSearch") } # /rtemis::get_tuner_fn # %% tune ---- #' Tune Supervised Learning Model #' #' @param x tabular data: Training set data. #' @param hyperparameters `Hyperparameters` object: make using each learner's `setup_*` function. #' @param tuner_config `TunerConfig` object: created with [setup_GridSearch]. #' @param preprocessor_config Optional `PreprocessorConfig` object: created with #' [setup_Preprocessor]. #' @param weights Numeric vector: Optional case weights. #' @param verbosity Integer: Verbosity level. #' #' @author EDG #' @keywords internal #' @noRd tune <- function( x, hyperparameters, tuner_config, preprocessor_config = NULL, weights = NULL, verbosity = 1L, backend = "none", future_plan = "multicore", n_workers = 1L ) { check_is_S7(hyperparameters, Hyperparameters) check_is_S7(tuner_config, TunerConfig) stopifnot(needs_tuning(hyperparameters)) if (tuner_config@type == "GridSearch") { tune_GridSearch( x = x, hyperparameters = hyperparameters, tuner_config = tuner_config, preprocessor_config = preprocessor_config, weights = weights, verbosity = verbosity, backend = backend, future_plan = future_plan, n_workers = n_workers ) } else { cli::cli_abort("Unsupported tuner type: {tuner_config@type}") } } # /rtemis::tune ================================================ FILE: R/tune_GridSearch.R ================================================ # tune_GridSearch.R # ::rtemis:: # 2025 EDG rtemis.org # %% tune_GridSearch ---- #' \pkg{rtemis} internal: Grid Search for Hyperparameter Tuning of \pkg{rtemis} Learners #' #' Train models using a combination of parameter values for model selection #' #' @details #' Note that weights, if defined (and not NULL), should be passed directly to `grid_search` #' as they need to be resampled along `x` and `y`, and should not be passed along with #' `grid_params`. `ifw` and `ifw_type` should be passed as part of `grid_params` #' and will be passed on to the learner. #' Includes a algorithm-specific extraction of config that are determined internally, #' such as `lambda` for `GLMNET`, `nrounds` for `LightGBM`, etc. #' #' The current implementation allows running sequentially either directly using lapply + cli #' progress, or using a sequential future plan. The former may give better debugging information. #' The latter may be helpful to test that the future parallelization setup works correctly. #' #' @param x tabular data: Training set. #' @param hyperparameters `Hyperparameters` object created with a learner's `setup_*` function. #' @param tuner_config `TunerConfig` object created with [setup_GridSearch]. #' @param preprocessor_config Optional `PreprocessorConfig` object: Applied within each tuning #' fold so hyperparameters are evaluated on preprocessed data. #' @param weights Vector: Class weights. #' @param save_mods Logical: Save models in tuning results. #' @param n_workers Integer: Number of workers to use for parallel processing. #' @param backend Character: Type of parallelization to use. Options are "none", "future", #' or "mirai". #' @param future_plan Character: Future plan to use if `backend` is "future". #' @param verbosity Integer: Verbosity level. #' #' @return `GridSearch` object. #' #' @author EDG #' #' @keywords internal #' @noRd tune_GridSearch <- function( x, hyperparameters, tuner_config, preprocessor_config = NULL, weights = NULL, save_mods = FALSE, n_workers = 1L, backend = NULL, future_plan = NULL, verbosity = 1L ) { check_is_S7(hyperparameters, Hyperparameters) check_is_S7(tuner_config, TunerConfig) stopifnot(needs_tuning(hyperparameters)) # Dependencies ---- if (backend == "future") { check_dependencies("futurize", "future.apply") if (!is.null(future_plan) && future_plan == "sequential") { if (n_workers > 1L) { cli::cli_abort( "Requested 'sequential' future plan, which supports {.val 1L} worker, but {.val {n_workers}} workers were requested." ) } } } else if (backend == "mirai") { check_dependencies("mirai") } # Intro ---- start_time <- intro( newline_pre = TRUE, caller = "tune_GridSearch", verbosity = verbosity - 1L ) # Arguments ---- algorithm <- hyperparameters@algorithm # Parallel Processing Strategy ---- # If backend is NULL, default to "none" if (is.null(backend)) { backend <- "none" } # If backend is "future" or "mirai" with n_workers = 1, we execute # sequentially using the respective backend just to test that the # parallelization setup works. # If the user wants standard sequential execution, they should use/leave # backend = "none" (default). if (backend != "none" && n_workers == 1L) { if (verbosity > 0L) { msg0( "Using ", backend, " with 1 worker" ) } } # Make Grid ---- grid_params <- get_hyperparams_need_tuning(hyperparameters) n_resamples <- tuner_config[["resampler_config"]][["n"]] search_type <- tuner_config[["search_type"]] # expand_grid converts NULL to "null" for expansion to work. param_grid <- expand_grid(grid_params, stringsAsFactors = FALSE) param_grid <- cbind(param_combo_id = seq_len(NROW(param_grid)), param_grid) n_param_combinations <- NROW(param_grid) res_param_grid <- expand_grid( c(list(resample_id = seq_len(n_resamples)), grid_params), stringsAsFactors = FALSE ) n_res_x_comb <- NROW(res_param_grid) if (search_type == "randomized") { index_per_resample <- sample( n_param_combinations, round(tuner_config[["randomize_p"]] * n_param_combinations) ) res_param_grid <- res_param_grid[rep(index_per_resample, n_resamples), ] } # Intro pt. 2 ---- if (verbosity > 0L) { msg0( fmt("<> ", col = col_tuner, bold = TRUE), "Tuning ", algorithm, " by ", search_type, " grid search with ", desc(tuner_config@config[["resampler_config"]]), "..." ) msg0( fmt(n_param_combinations, col = col_tuner, bold = TRUE), ngettext( n_param_combinations, " parameter combination x ", " parameter combinations x " ), fmt(n_resamples, col = col_tuner, bold = TRUE), " resamples: ", fmt(n_res_x_comb, col = col_tuner, bold = TRUE), " models total", " (", Sys.getenv("R_PLATFORM"), ")." ) } # Resamples ---- res <- resample( x = x, config = tuner_config[["resampler_config"]], verbosity = verbosity ) # learner1 ---- if (backend == "future") { ptn <- progressr::progressor(steps = NROW(res_param_grid)) } learner1 <- function( index, x, res, res_param_grid, hyperparameters, preprocessor_config, weights, verbosity, save_mods, n_res_x_comb ) { if (verbosity > 1L) { msg_info( "Running grid line #", fmt(index, col = col_tuner, bold = TRUE), "/", NROW(res_param_grid), "...", caller = "tune_GridSearch", sep = "" ) } res1 <- res[[res_param_grid[index, "resample_id"]]] dat_train1 <- x[res1, ] weights1 <- weights[res1] dat_valid1 <- x[-res1, ] hyperparams1 <- hyperparameters hyperparams1 <- update( hyperparams1, as.list(res_param_grid[index, 2:NCOL(res_param_grid), drop = FALSE]), tuned = -9L # Hyperparameters are being tuned ) mod1 <- do_call( "train", args = list( x = dat_train1, dat_validation = dat_valid1, algorithm = hyperparams1@algorithm, preprocessor_config = preprocessor_config, hyperparameters = hyperparams1, weights = weights1, verbosity = verbosity - 1L ) ) out1 <- list( id = index, resample_id = res_param_grid[index, "resample_id"], metrics_training = mod1@metrics_training, metrics_validation = mod1@metrics_validation, type = mod1@type, hyperparameters = hyperparams1 ) # Algorithm-specific params ---- # => add to hyperparameters if (algorithm == "GLMNET") { out1[["hyperparameters"]]@hyperparameters[["lambda.min"]] <- mod1@model[[ "lambda.min" ]] out1[["hyperparameters"]]@hyperparameters[["lambda.1se"]] <- mod1@model[[ "lambda.1se" ]] } if (algorithm == "LightGBM") { # Check best_iter is meaningful, otherwise issue message and set to 100L best_iter <- mod1@model[["best_iter"]] if (is.null(best_iter) || best_iter == -1 || best_iter == 0) { msg_info( paste( "best_iter returned from lightgbm:", best_iter, "- setting to 100L" ) ) best_iter <- 100L } out1[["hyperparameters"]]@hyperparameters[["best_iter"]] <- best_iter } # if (algorithm %in% c("LINAD", "LINOA")) { # out1$est.n.leaves <- mod1$mod$n.leaves # } # if (algorithm == "LIHADBoost") { # out1$sel.n.steps <- mod1$mod$selected.n.steps # } if (save_mods) { out1[["mod1"]] <- mod1 } if (backend == "future") { ptn(sprintf("Tuning resample %i/%i", index, n_res_x_comb)) } out1 } # /learner1 # Train Grid ---- if (backend == "none") { if (verbosity > 0L) { msg("Tuning in sequence") } # Sequential execution with cli progress. grid_run <- lapply( cli::cli_progress_along( seq_len(n_res_x_comb), name = paste0("Tuning... (", n_res_x_comb, " combinations)"), type = "tasks" ), FUN = learner1, x = x, res = res, hyperparameters = hyperparameters, res_param_grid = res_param_grid, preprocessor_config = preprocessor_config, weights = weights, verbosity = verbosity, save_mods = save_mods, n_res_x_comb = n_res_x_comb ) } else if (backend == "future") { # Future parallelization future_plan <- set_preferred_plan( requested_plan = future_plan, n_workers = n_workers, envir = parent.frame(), verbosity = verbosity ) if (verbosity > 0L) { msg0( "Tuning using future (", bold(future_plan), "); N workers: ", bold(n_workers) ) } if (verbosity > 1L) { # verify plan set by set_preferred_plan with envir msg_info("Current future plan:") print(future::plan()) } grid_run <- lapply( X = seq_len(n_res_x_comb), FUN = learner1, x = x, res = res, hyperparameters = hyperparameters, res_param_grid = res_param_grid, preprocessor_config = preprocessor_config, weights = weights, verbosity = verbosity, save_mods = save_mods, n_res_x_comb = n_res_x_comb ) |> futurize::futurize(seed = TRUE, globals = FALSE) } else if (backend == "mirai") { if (verbosity > 0L) { msg("Tuning using mirai; N workers:", bold(n_workers)) } mirai::daemons(n_workers, dispatcher = TRUE) on.exit(mirai::daemons(0L)) grid_run <- mirai::mirai_map( .x = seq_len(n_res_x_comb), .f = learner1, .args = list( x = x, res = res, hyperparameters = hyperparameters, res_param_grid = res_param_grid, preprocessor_config = preprocessor_config, weights = weights, verbosity = verbosity, save_mods = save_mods, n_res_x_comb = n_res_x_comb ) ) } # Metric ---- type <- supervised_type(x) metric <- tuner_config@config[["metric"]] maximize <- tuner_config@config[["maximize"]] if (is.null(metric)) { if (type == "Classification") { metric <- "Balanced_Accuracy" } else if (type == "Regression") { metric <- "MSE" } else { metric <- "Concordance" } tuner_config@config[["metric"]] <- metric } if (is.null(maximize)) { maximize <- metric %in% c("Accuracy", "Balanced_Accuracy", "Concordance", "Rsq", "r") tuner_config@config[["maximize"]] <- maximize } select_fn <- if (maximize) which.max else which.min verb <- if (maximize) "maximize" else "minimize" # Aggregate ---- # Average test errors # if using mirai, wait for all to finish if (backend == "mirai") { # Appease R CMD check .progress <- NULL grid_run <- grid_run[.progress] # grid_run <- mirai::collect_mirai(grid_run) } if (type %in% c("Regression", "Survival")) { metrics_training_all <- as.data.table(t(sapply( grid_run, function(r) unlist(r[["metrics_training"]]@metrics) ))) metrics_validation_all <- as.data.table(t(sapply( grid_run, function(r) unlist(r[["metrics_validation"]]@metrics) ))) } else if (type == "Classification") { metrics_training_all <- as.data.table(t(sapply( grid_run, function(r) unlist(r[["metrics_training"]]@metrics[["Overall"]]) ))) metrics_validation_all <- as.data.table(t(sapply( grid_run, function(r) unlist(r[["metrics_validation"]]@metrics[["Overall"]]) ))) } # appease R CMD check param_combo_id <- NULL metrics_validation_all[, param_combo_id := rep( seq_len(n_param_combinations), each = n_resamples ) ] metrics_training_all[, param_combo_id := rep( seq_len(n_param_combinations), each = n_resamples ) ] metrics_training_by_combo_id <- metrics_training_all[, lapply( .SD, get(tuner_config[["metrics_aggregate_fn"]]) ), by = param_combo_id ] metrics_validation_by_combo_id <- metrics_validation_all[, lapply( .SD, get(tuner_config[["metrics_aggregate_fn"]]) ), by = param_combo_id ] tune_results <- list( param_grid = param_grid, metrics_training = metrics_training_by_combo_id, metrics_validation = metrics_validation_by_combo_id ) # Algorithm-specific collection ---- # N of iterations is the one hyperparameter that may be determined # automatically, we therefore need to extract it and average it ## GLMNET ---- if (algorithm == "GLMNET") { if (is.null(grid_params[["lambda"]])) { # if lambda was NULL, cv.glmnet was run and optimal lambda was estimated # For each i in grid_run, get grid_run[[i]]$hyperparameters[[grid_run[[i]]$hyperparameters$which_lambda_cv]] if (verbosity > 1L) { msg_info("Extracting best lambda from GLMNET models...") } lambda_cv2 <- data.table( lambda = sapply( grid_run, function(x) { x[["hyperparameters"]][[x[["hyperparameters"]][[ "which_lambda_cv" ]]]] } ) ) lambda_cv2[, param_combo_id := rep( seq_len(n_param_combinations), each = n_resamples ) ] lambda_by_param_combo_id <- lambda_cv2[, lapply(.SD, get(tuner_config[["metrics_aggregate_fn"]])), by = param_combo_id ] # Replace NULL lambda in tune_results$param_grid with average value of CV-squared lambda stopifnot(tune_results[["param_grid"]][["lambda"]] == "null") param_grid[["lambda"]] <- tune_results[["param_grid"]][[ "lambda" ]] <- lambda_by_param_combo_id[["lambda"]] } } # /GLMNET ## LightGBM ---- if (algorithm == "LightGBM") { if (is.null(grid_params[["nrounds"]])) { if (verbosity > 1L) { msg_info("Extracting best N of iterations from LightGBM models...") } nrounds_cv <- data.table( nrounds = sapply(grid_run, \(x) x[["hyperparameters"]][["best_iter"]]) ) nrounds_cv[["param_combo_id"]] <- rep( seq_len(n_param_combinations), each = n_resamples ) nrounds_by_param_combo_id <- nrounds_cv[, lapply(.SD, get(tuner_config[["metrics_aggregate_fn"]])), by = param_combo_id ] # Replace NULL nrounds in tune_results$param_grid with average value of Res nrounds stopifnot(tune_results[["param_grid"]][["nrounds"]] == "null") param_grid[["nrounds"]] <- tune_results[["param_grid"]][["nrounds"]] <- as.integer(round(nrounds_by_param_combo_id[["nrounds"]])) } } # /LightGBM ## GBM, H2OGBM ---- # if (algorithm %in% c("H2OGBM", "GBM", "GBM3")) { # est.n.trees.all <- data.frame(n.trees = plyr::laply( # grid_run, # function(x) x$est.n.trees # )) # est.n.trees.all$param_combo_id <- rep(seq_len(n_param_combinations), each = n_resamples) # est.n.trees.by.param_combo_id <- aggregate( # n.trees ~ param_combo_id, est.n.trees.all, # metrics_aggregate_fn # ) # tune_results <- cbind( # n.trees = round(est.n.trees.by.param_combo_id$n.trees), # tune_results # ) # n_params <- n_params + 1 # } # /GBM, H2OGBM ## XGBoost ---- # if (algorithm == "XGBoost") { # if (verbosity > 1L) { # msg(highlight("Extracting best N of iterations from XGBoost models...")) # } # est.nrounds.all <- data.frame(nrounds = plyr::laply( # grid_run, # \(m) m$best_iteration # )) # est.nrounds.all$param_combo_id <- rep(seq_len(n_param_combinations), # each = n_resamples # ) # est.nrounds.by.param_combo_id <- aggregate( # nrounds ~ param_combo_id, est.nrounds.all, # metrics_aggregate_fn # ) # tune_results <- cbind( # nrounds = round(est.nrounds.by.param_combo_id$nrounds), # tune_results # ) # n_params <- n_params + 1 # } /XGBoost ## LINAD ---- # if (algorithm %in% c("LINAD", "LINOA")) { # if (verbosity > 1L) { # msg_info("Extracting best N leaves from LINAD models...") # } # est.n.leaves.all <- data.frame(n.leaves = plyr::laply( # grid_run, # \(x) ifelse(length(x$est.n.leaves) == 0, 1, x$est.n.leaves) # )) # est.n.leaves.all$param_combo_id <- rep(seq_len(n_param_combinations), # each = n_resamples # ) # est.n.leaves.by.param_combo_id <- aggregate( # n.leaves ~ param_combo_id, est.n.leaves.all, # metrics_aggregate_fn # ) # tune_results <- cbind( # n.leaves = # round(est.n.leaves.by.param_combo_id$n.leaves), tune_results # ) # n_params <- n_params + 1 # } # /LINAD, LINOA ## LIHADBoost ---- # if (algorithm == "LIHADBoost") { # if (verbosity > 1L) { # msg(highlight("Extracting best N steps from LIHADBoost models...")) # } # est.n.steps.all <- data.frame(n.steps = plyr::laply( # grid_run, # \(x) x$sel.n.steps # )) # est.n.steps.all$param_combo_id <- rep(seq_len(n_param_combinations), # each = n_resamples # ) # est.n.steps.by.param_combo_id <- aggregate( # n.steps ~ param_combo_id, est.n.steps.all, # metrics_aggregate_fn # ) # tune_results <- cbind( # n.steps = round(est.n.steps.by.param_combo_id$n.steps), # tune_results # ) # n_params <- n_params + 1 # } # /LIHADBoost # Consider explicitly sorting hyperparam values in increasing order, # so that in case of tie, lowest value is chosen - # if that makes sense, e.g. n.leaves, etc. best_param_combo_id <- as.integer( tune_results[["metrics_validation"]][ select_fn(tune_results[["metrics_validation"]][[metric]]), 1 ] ) best_param_combo <- as.list(param_grid[best_param_combo_id, -1, drop = FALSE]) if (verbosity > 0L) { msg( paste0("Best config to ", paste(verb, metric), ":") ) print_tune_finding(grid_params, best_param_combo) } # Outro ---- # Since this is always called from within `train()`, we don't want to print "Completed..." outro(start_time, verbosity = verbosity - 1L) if (verbosity > 0L) { msg( fmt("", col = col_tuner, bold = TRUE), "Tuning done." ) } # => add optional mods field to GridSearch # if (save_mods) mods <- grid_run GridSearch( hyperparameters = hyperparameters, tuner_config = tuner_config, tuning_results = list( param_grid = param_grid, training = metrics_training_by_combo_id, validation = metrics_validation_by_combo_id ), best_hyperparameters = best_param_combo ) } # /rtemis::tune_GridSearch # %% print_tune_finding ---- #' Print tuning results #' #' Prints set of search values and best value in the form {1, 3, 5} => 3 #' for each hyperparameter that was tuned. #' #' @author EDG #' @keywords internal #' @noRd print_tune_finding <- function(grid_params, best_param_combo, pad = 22L) { # Make list of search values and best value tfl <- lapply(seq_along(grid_params), function(i) { paste0( "{", paste(grid_params[[i]], collapse = ", "), "}", " => ", bold(best_param_combo[[names(grid_params)[i]]]) ) }) names(tfl) <- names(grid_params) # Capture output to sync with msg stream (stderr) out <- utils::capture.output(printls(tfl, print_class = FALSE, pad = pad)) message(paste(out, collapse = "\n")) } # /rtemis::print_tune_finding ================================================ FILE: R/utils.R ================================================ # utils.R # ::rtemis:: # 2016- EDG rtemis.org #' Print range of continuous variable #' #' @param x Numeric vector #' @param ddSci Logical: If TRUE, use [ddSci] or range. #' @param decimal_places Integer: Number of decimal place to use if `ddSci = TRUE`. #' @param na.rm Logical: passed to `base::range` #' #' @return Called for its side effect of printing the range of `x`. #' #' @author EDG #' @keywords internal #' @noRd show_range <- function(x, ddSci = TRUE, decimal_places = 1, na.rm = TRUE) { if (ddSci) { paste( ddSci(range(x, na.rm = na.rm), decimal_places = decimal_places), collapse = " to " ) } else { paste(range(x, na.rm = na.rm), collapse = " to ") } } # /rtemis::show_range #' Set Dynamic Range #' #' `rtemis preproc`: Adjusts the dynamic range of a vector or matrix input. #' By default normalizes to 0-1 range. #' #' @param x Numeric vector or matrix / data frame: Input #' @param lo Target range minimum. Defaults to 0 #' @param hi Target range maximum. Defaults to 1 #' @param byCol Logical: If TRUE: if `x` is matrix, `drange` each #' column separately #' #' @return Numeric vector. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' x <- runif(20, -10, 10) #' x <- drange(x) drange <- function(x, lo = 0, hi = 1, byCol = TRUE) { dr <- function(x, lo, hi) { .min <- min(x, na.rm = TRUE) (x - .min) / max(x - .min, na.rm = TRUE) * (hi - lo) + lo } if (NCOL(x) > 1) { if (byCol) { apply(x, 2, function(x) dr(x, lo, hi)) } else { dr(x, lo, hi) } } else { dr(x, lo, hi) } } # /rtemis::drange #' Factor NA to "missing" level #' #' Set NA values of a factor vector to a new level indicating missingness #' #' @param x Factor. #' @param na_level_name Character: Name of new level to create that will be assigned to all current #' NA values in `x`. #' #' @return factor. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' x <- factor(sample(letters[1:3], 100, TRUE)) #' x[sample(1:100, 10)] <- NA #' xm <- factor_NA2missing(x) factor_NA2missing <- function(x, na_level_name = "missing") { check_inherits(x, "factor") if (anyNA(x)) { x <- factor(x, levels = c(levels(x), na_level_name)) x[is.na(x)] <- na_level_name x } else { x } } # /rtemis::factor_NA2missing #' Filter order #' #' @param x Input vector #' @param idl Logical vector: Index of elements to filter #' @param decreasing Logical: If TRUE, sort in descending order #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' x <- rnorm(10) #' x #' x[filter_order(x, x < 0)] filter_order <- function(x, idl, decreasing = FALSE) { idi <- which(idl) flt_ord <- order(x[idi], decreasing = decreasing) idi[flt_ord] } #' Get the mode of a factor or integer #' #' Returns the mode of a factor or integer #' #' @param x Vector, factor or integer: Input data. #' @param na.rm Logical: If TRUE, exclude NAs (using `na.exclude(x)`). #' @param getlast Logical: If TRUE, get the last value in case of ties. #' @param retain_class Logical: If TRUE, output is always same class as input. #' #' @return The mode of `x` #' #' @author EDG #' @export #' #' @examples #' x <- c(9, 3, 4, 4, 0, 2, 2, NA) #' get_mode(x) #' x <- c(9, 3, 2, 2, 0, 4, 4, NA) #' get_mode(x) #' get_mode(x, getlast = FALSE) get_mode <- function( x, na.rm = TRUE, getlast = TRUE, retain_class = TRUE ) { if (retain_class) { .class <- class(x) } if (na.rm) { x <- na.exclude(x) } freq <- table(x) if (sum(freq) > 0) { if (getlast) { .vals <- unique(x) out <- .vals[rev(which(.vals %in% names(freq)[which(freq == max(freq))]))[ 1 ]] } else { out <- names(freq)[which.max(freq)] } if (length(out) == 0) out <- NA } else { out <- NA } if (retain_class) { if (is.factor(x)) { out <- factor(out, levels = levels(x)) } else { class(out) <- .class } } out } # /rtemis::get_mode #' Check if vector is constant #' #' @param x Vector: Input #' @param skip_missing Logical: If TRUE, skip NA values before test #' #' @return Logical. #' #' @author EDG #' @export #' #' @examples #' x <- rep(9, 1000000) #' is_constant(x) #' x[10] <- NA #' is_constant(x) #' is_constant(x, skip_missing = TRUE) is_constant <- function(x, skip_missing = FALSE) { # all(duplicated(x)[-1L]) if (skip_missing) { x <- na.exclude(x) } isTRUE(all(x == x[1])) } # /rtemis::is_constant #' Check if variable is discrete (factor or integer) #' #' @param x Input #' #' @return Logical. #' #' @author EDG #' @keywords internal #' @noRd is_discrete <- function(x) { is.factor(x) || is.integer(x) || is.logical(x) || is.character(x) } # /rtemis::is_discrete #' Return object if it has length > 0 #' #' Returns the input object if it has length > 0, else NULL #' #' @param x Object #' #' @return `x` if `length(x) > 0`, else `NULL` #' #' @keywords internal #' @noRd iflengthy <- function(x) { if (length(x) > 0) x else NULL } # /rtemis::iflengthy #' @keywords internal #' @noRd pval_stars <- function(x) { cut(x, breaks = c(0, .001, .01, .05, 1), labels = c("***", "**", "*", "")) } #' Format singular/plural noun #' #' @keywords internal #' @noRd #' #' @examples #' singorplu(0, "cat") #' singorplu(1, "cat") #' singorplu(2, "cat") singorplu <- function(n, x) { switch( as.character(n), `0` = paste0("no ", x, "s"), `1` = paste("1", x), paste0(n, " ", x, "s") ) } #' Size of object #' #' Returns the size of an object #' #' @details #' If `dim(x)` is NULL, returns `length(x)`. #' @param x any object with `length()` or `dim()`. #' @param verbosity Integer: Verbosity level. If > 0, print size to console #' #' @return Integer vector with length equal to the number of dimensions of `x`, invisibly. #' #' @author EDG #' @export #' #' @examples #' x <- rnorm(20) #' size(x) #' # 20 #' x <- matrix(rnorm(100), 20, 5) #' size(x) #' # 20 5 size <- function(x, verbosity = 1L) { z <- if (is.null(dim(x))) { length(x) } else { dim(x) } if (verbosity > 0L) { # Format to add "," for thousands z_formatted <- format(z, trim = TRUE, big.mark = ",", scientific = FALSE) cat(paste(bold(z_formatted), collapse = gray(" x ")), "\n") } invisible(z) } # /rtemis::size #' Recycle values of vector to match length of target. #' #' @details #' If `target` is longer than `x`, the values of `x` will be recycled to match the length of #' `target`. If `x` is longer than `target`, the values of `x` will be truncated to match the #' length of `target`. #' Used internally by many functions. #' #' @param x Vector to be recycled #' @param target Object whose length defines target length #' #' @return Vector. #' #' @author EDG #' @keywords internal #' @noRd recycle <- function(x, target) { lenx <- length(x) lent <- length(target) rep(x, ceiling(lent / lenx))[seq_len(lent)] } # /rtemis::recycle #' Random Normal Matrix #' #' Create a matrix or data frame of defined dimensions, whose columns are random normal vectors #' #' @param nrow Integer: Number of rows. #' @param ncol Integer: Number of columns. #' @param mean Float: Mean. #' @param sd Float: Standard deviation. #' @param return_df Logical: If TRUE, return data.frame, otherwise matrix. #' @param seed Integer: Set seed for `rnorm`. #' #' @return `matrix` or `data.frame`. #' #' @author EDG #' @export #' #' @examples #' x <- rnormmat(20, 5, mean = 12, sd = 6, return_df = TRUE, seed = 2026) #' x rnormmat <- function( nrow = 10, ncol = 10, mean = 0, sd = 1, return_df = FALSE, seed = NULL ) { if (length(mean) != ncol) { mean <- rep_len(mean, ncol) } if (length(sd) != ncol) { sd <- rep_len(sd, ncol) } if (!is.null(seed)) { set.seed(seed) } mat <- sapply(seq_len(ncol), function(j) { rnorm(nrow, mean = mean[j], sd = sd[j]) }) if (return_df) { mat <- as.data.frame(mat) } mat } # /rtemis::rnormmat #' Random Uniform Matrix #' #' Create a matrix or data frame of defined dimensions, whose columns are random uniform vectors #' #' @param nrow Integer: Number of rows. #' @param ncol Integer: Number of columns. #' @param min Float: Min. #' @param max Float: Max. #' @param return_df Logical: If TRUE, return data.frame, otherwise matrix. #' @param seed Integer: Set seed for `rnorm`. #' #' @return `matrix` or `data.frame`. #' #' @author EDG #' @export #' #' @examples #' x <- runifmat(20, 5, min = 12, max = 18, return_df = TRUE, seed = 2026) #' x runifmat <- function( nrow = 10, ncol = 10, min = 0, max = 1, return_df = FALSE, seed = NULL ) { if (length(min) < ncol) { min <- rep(min, ncol / length(min)) } if (length(max) < ncol) { max <- rep(max, ncol / length(max)) } if (!is.null(seed)) { set.seed(seed) } mat <- sapply(seq_len(ncol), function(j) runif(nrow, min = min, max = max)) if (return_df) { mat <- as.data.frame(mat) } mat } # /rtemis::runifmat #' Get rtemis version and system info #' #' @return List: rtemis version and system info, invisibly. #' #' @author EDG #' @export #' #' @examples #' rtversion() rtversion <- function() { out <- c( list(rtemis_version = as.character(packageVersion("rtemis"))), as.list(Sys.info()) ) printls(out, print_class = FALSE) invisible(out) } # /rtemis::rtversion #' Symmetric Set Difference #' #' @param x vector #' @param y vector of same type as `x` #' #' @return Vector. #' #' @author EDG #' @export #' #' @examples #' setdiff(1:10, 1:5) #' setdiff(1:5, 1:10) #' setdiffsym(1:10, 1:5) #' setdiffsym(1:5, 1:10) setdiffsym <- function(x, y) { union(setdiff(x, y), setdiff(y, x)) } # /rtemis::setdiffsym #' Initialize Project Directory #' #' Initializes Directory Structure: "R", "Data", "Results" #' #' @param path Character: Path to initialize project directory in. #' @param output_dir Character: Name of output directory to create. #' @param verbosity Integer: Verbosity level. #' #' @return Character: the path where the project directory was initialized, invisibly. #' #' @author EDG #' @export #' #' @examples #' \dontrun{ #' # Will create "my_project" directory with #' init_project_dir("my_project") #' } init_project_dir <- function(path, output_dir = "Out", verbosity = 1L) { if (verbosity > 0L) { msg("Initializing project directory...") } path <- normalizePath(path, mustWork = FALSE) # Create project directory if it doesn't exist if (!dir.exists(path)) { if (verbosity > 0L) { cat(" > Creating ", bold(path), " folder...", sep = "") } dir.create(path, recursive = TRUE) if (dir.exists(path)) { if (verbosity > 0L) { yay() } } else { if (verbosity > 0L) { nay() } cli::cli_abort( "Failed to create project directory at {.file {path}}. Check path & permissions." ) } } # Log file: rtemis_init.log ---- logfile_path <- file.path(path, "rtemis_init.log") sink(file = logfile_path, append = TRUE) cat("\n") cat("Initialized: ", datetime(), "\n", sep = "") cat("--------------------------------\n") print(sessionInfo()) sink() # Directories: /R /Data /output_dir ---- dirs <- file.path(path, c("R", "Data", output_dir)) for (i in dirs) { if (verbosity > 0L) { cat(" > Creating ", bold(i), " folder...", sep = "") } if (!dir.exists(i)) { dir.create(i) if (dir.exists(i)) { if (verbosity > 0L) yay() } else { if (verbosity > 0L) nay() } } else { if (verbosity > 0L) cat(orange(" Already present\n", bold = TRUE)) } } if (verbosity > 0L) { msg("Done.") } invisible(path) } # /rtemis::init_project_dir ================================================ FILE: R/utils_art.R ================================================ # utils_art.R # ::rtemis:: # 2025- EDG rtemis.org #' Color columns of text art #' #' This function accepts text input of 1 or more lines and two colors. #' It will: #' a) generate a color gradient between the two colors #' b) apply the gradient to each column of the text, creating a left to right color gradient. #' #' @param x Character vector of text to colorize. #' @param color_left Color for the left side of the gradient. #' @param color_right Color for the right side of the gradient. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character vector with color formatting applied to each column. #' #' @author EDG #' @keywords internal #' @noRd color_txt_columns <- function( x, color_left, color_right, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) # Count number of columns in input text ncols <- max(nchar(x, type = "width")) if (ncols == 0) { return(x) } # Create color gradient from color_left to color_right with ncols steps gradient <- grDevices::colorRampPalette(c(color_left, color_right))(ncols) # Apply the colors to each column of the text result <- character(length(x)) for (i in seq_along(x)) { line <- x[i] line_chars <- strsplit(line, "")[[1]] line_width <- nchar(line, type = "width") if (line_width == 0) { result[i] <- line next } colored_chars <- character(length(line_chars)) for (j in seq_along(line_chars)) { char <- line_chars[j] if (char == " ") { colored_chars[j] <- char } else { # Use column position for gradient color col_pos <- min(j, ncols) colored_chars[j] <- fmt( char, col = gradient[col_pos], output_type = output_type ) } } result[i] <- paste0(colored_chars, collapse = "") } result } # /rtemis::color_txt_columns #' Color rows of text art #' #' This function accepts text input of 1 or more lines and two colors. #' It will: #' a) generate a color gradient between the two colors #' b) apply the gradient to each row of the text, creating a top to bottom color gradient. #' #' @param x Character vector of text to colorize. #' @param color_top Color for the top of the gradient. #' @param color_bottom Color for the bottom of the gradient. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character vector with color formatting applied to each row. #' #' @author EDG #' @keywords internal #' @noRd color_txt_rows <- function( x, color_top, color_bottom, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) # Number of rows nrows <- length(x) if (nrows == 0) { return(x) } # Create color gradient from color_top to color_bottom with nrows steps gradient <- grDevices::colorRampPalette(c(color_top, color_bottom))(nrows) # Apply the colors to each row of the text result <- character(nrows) for (i in seq_along(x)) { line <- x[i] line_chars <- strsplit(line, "")[[1]] line_width <- nchar(line, type = "width") if (line_width == 0) { result[i] <- line next } colored_chars <- character(length(line_chars)) for (j in seq_along(line_chars)) { char <- line_chars[j] if (char == " ") { colored_chars[j] <- char } else { colored_chars[j] <- fmt( char, col = gradient[i], output_type = output_type ) } } result[i] <- paste0(colored_chars, collapse = "") } result } # /rtemis::color_txt_rows #' pkglogo #' #' @param pkg Character: Package name. #' @param filename Character: Filename of the logo file (without path). #' @param fmt_fn Function: "color_txt_columns", "color_text_rows" Formatting function to apply to #' the logo text. #' @param args List: Arguments to pass to `fmt_fn`. #' @param pad Integer: Left-pad output with this many spaces. #' #' @return Character: Formatted logo text. #' #' @author EDG #' @keywords internal #' @noRd pkglogo <- function( pkg = .packageName, filename = paste0(pkg, ".utf8"), fmt_fn = color_txt_columns, args = list( color_left = kaimana_red, color_right = coastside_orange, output_type = "ansi" ), pad = 2L ) { logo_file <- system.file( package = .packageName, "resources", filename ) logo_txt <- readLines(logo_file) paste0( strrep(" ", pad), do.call(fmt_fn, c(list(x = logo_txt), args)), collapse = "\n" ) } # /rtemis::pkglogo #' Show colors #' #' Display color previews with ANSI color blocks #' #' @param x Named vector or list of colors to preview. #' @param pad Integer: Pad output with this many spaces. #' @param center_title Logical: If TRUE, autopad title for centering, if present. #' @param title Character: Optional title to display. #' @param title_newline Logical: If TRUE, add newline after title. #' @param limit Integer: Maximum number of colors to show. Set to -1L for no limit. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted string that can be printed with cat() #' #' @author EDG #' @keywords internal #' @noRd show_col <- function( x, pad = 2L, center_title = TRUE, title = NULL, title_newline = TRUE, limit = 12L, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) # Helper function to build padded string equivalent of padcat build_padcat <- function( text, pad = 2L, newline_pre = FALSE, newline = FALSE ) { result <- "" if (newline_pre) { result <- paste0(result, "\n") } result <- paste0(result, strrep(" ", pad)) result <- paste0(result, text) if (newline) { result <- paste0(result, "\n") } result } # Initialize output string result <- "" if (is.null(x)) { if (!is.null(title)) { result <- paste0( result, highlight(build_padcat(title, pad = pad, newline = title_newline)) ) } result <- paste0(result, strrep(" ", pad), "NULL") return(result) } if (length(x) == 0) { result <- paste0(result, class(x), " of length 0.\n") return(result) } # Convert to named list if needed if (is.null(names(x)) && !is.list(x)) { names(x) <- as.character(x) } x <- as.list(x) xnames <- names(x) if (is.null(xnames)) { xnames <- paste0("color_", seq_along(x)) } # Calculate left-hand side width lhs <- max(nchar(xnames)) + pad # Add title if provided if (!is.null(title)) { title_pad <- if (center_title) { max(0, lhs - round((.5 * nchar(title))) - 3) } else { 0 } result <- paste0( result, highlight(build_padcat(title, pad = title_pad, newline = title_newline)), "\n" ) } # Show limit message if needed counter <- 0L if (limit != -1L && length(x) > limit) { limit_text <- paste0( italic( gray( paste0( "Showing first ", limit, " of ", length(x), " colors.\n" ), output_type = output_type ), output_type = output_type ) ) result <- paste0(result, build_padcat(limit_text, pad = pad)) } # Display each color for (i in seq_along(x)) { counter <- counter + 1L if (limit != -1L && counter > limit) { more_text <- paste0( italic( gray( paste0( "...", length(x) - limit, " more colors not shown.\n" ) ), output_type = output_type ) ) result <- paste0(result, build_padcat(more_text, pad = pad)) break } # Get color value color_val <- x[[i]] # Create color blocks: 2 solid, 2 medium, 2 light if (output_type == "ansi") { # Use the color directly color_display <- tryCatch( { # Create blocks with varying intensities solid_block <- fmt( "\u2588", col = color_val, output_type = output_type ) medium_block <- fmt( "\u2593", col = color_val, output_type = output_type ) light_block <- fmt( "\u2591", col = color_val, output_type = output_type ) paste0( solid_block, solid_block, medium_block, medium_block, light_block, light_block ) }, error = function(e) { # Fallback if color conversion fails paste0( "\u2588\u2588\u2593\u2593\u2591\u2591 (", color_val, ")" ) } ) } else { # For non-ANSI output, just show the color value color_display <- paste0( "\u2588\u2588\u2593\u2593\u2591\u2591 (", color_val, ")" ) } # Format and add the line item_text <- paste0( bold( format( xnames[i], width = lhs, justify = "right" ), output_type = output_type ), ": ", color_display, "\n" ) result <- paste0(result, item_text) } result } # /rtemis::show_col ================================================ FILE: R/utils_async.R ================================================ # utils_async.R # ::rtemis:: # 2026- EDG rtemis.org # Define allowed future plans ALLOWED_PLANS <- c( "sequential", "multicore", "multisession", "cluster", "remote", "transparent", "future.mirai::mirai_multisession", # what user sets "mirai_multisession" # what future::plan() returns ) #' Check if system is Windows #' #' @return Logical: TRUE if Windows, FALSE otherwise #' @noRd is_windows <- function() { tolower(Sys.info()[["sysname"]]) == "windows" } # /is_windows #' Identify future plan #' #' @return Character: Name of current plan #' #' @noRd identify_plan <- function(x = NULL) { if (is.null(x)) { x <- future::plan() } for (p in ALLOWED_PLANS) { if (inherits(x, p)) { return(p) } } cli::cli_abort( "Detected future plan not in allowed plans ({.val {ALLOWED_PLANS}}). Detected plan class: {.val {class(x)}}" ) } # /rtemis::identify_plan #' Set preferred plan #' #' Sets the future plan according to system and user preference: #' - Check whether a plan has been set by the user #' - Check whether there is an option set for future plan #' - Check available cores #' - Check if Windows #' #' @param requested_plan Optional character: Requested plan, one of "multicore", "multisession", "sequential". #' @param n_workers Optional integer: Number of workers to use. #' #' @return Character: Name of plan set #' #' @author EDG #' @keywords internal #' @noRd set_preferred_plan <- function( requested_plan = NULL, n_workers = NULL, envir = parent.frame(), verbosity = 1L ) { # If user has requested a specific plan, try to set it if (!is.null(requested_plan)) { # Security check if (!requested_plan %in% ALLOWED_PLANS) { cli::cli_abort( "Requested plan {.val {requested_plan}} is not one of allowed plans: {.val {ALLOWED_PLANS}}" ) } # future::plan will determine workers if NULL & will set to sequential if only 1 core available # therefore plan set by following call is not always the requested one and needs to be # determined. if (requested_plan == "sequential") { with( future::plan(strategy = requested_plan), local = TRUE, envir = envir ) } else { with( future::plan(strategy = requested_plan, workers = n_workers), local = TRUE, envir = envir ) } return(identify_plan()) } # If user has not requested a specific plan, check if they have set one current_plan <- future::plan() # If the plan is not sequential, we must assume user set it and respect it (though it might # have been set by a different package) if (!inherits(current_plan, "sequential")) { return(identify_plan(current_plan)) } # If the plan is sequential, we can't currently tell if it was set by the user or is default # -> Ideally, we would know this. <- # We therefore proceed to set our preferred plan based on OS, n available cores, and requested # n workers. # If n_workers was set to 1 and no requested_plan was defined, use sequential if (!is.null(n_workers) && n_workers == 1L) { with( future::plan(strategy = "sequential"), local = TRUE, envir = envir ) return("sequential") } if (is_windows()) { # On Windows, multicore is not available preferred_plan <- "multisession" } else { preferred_plan <- "multicore" } with( future::plan(strategy = preferred_plan, workers = n_workers), local = TRUE, envir = envir ) # This will still be sequential and not "preferred_plan" if n_workers = 1 identify_plan() } # /set_preferred_plan ================================================ FILE: R/utils_checks.R ================================================ # utils_checks.R # ::rtemis:: # 2024- EDG rtemis.org # clean_* functions performm checks and return clean inputs. # check_* functions perform checks (do not return a value). # %% test_inherits ---- #' Check class of object #' #' @param x Object to check #' @param cl Character: class to check against #' #' @return Logical #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' test_inherits("papaya", "character") # TRUE #' test_inherits(c(1, 2.5, 3.2), "integer") #' test_inherits(iris, "list") # FALSE, compare to is_check(iris, is.list) test_inherits <- function(x, cl) { if (!inherits(x, cl)) { input <- deparse(substitute(x)) message(red(bold(input), "is not", bold(cl))) return(FALSE) } TRUE } # /rtemis::test_inherits # %% check_inherits ---- #' Check class of object #' #' @param x Object to check. #' @param cl Character: class to check against. #' @param allow_null Logical: If TRUE, NULL values are allowed and return early. #' #' @return Called for side effects. Throws an error if checks fail. #' #' @author EDG #' #' @keywords internal #' @noRd #' #' @examples #' check_inherits("papaya", "character") #' # These will throw errors: #' # check_inherits(c(1, 2.5, 3.2), "integer") #' # check_inherits(iris, "list") check_inherits <- function( x, cl, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (!inherits(x, cl)) { cli::cli_abort( "{.var {xname}} must be of class {.cls {cl}}." ) } invisible() } # /rtemis::check_inherits # %% clean_int ---- #' Clean integer input #' #' @details #' The goal is to return an integer vector. #' If the input is integer, it is returned as is. #' If the input is numeric, it is coerced to integer only if the numeric values are integers, #' otherwise an error is thrown. #' #' @param x Double or integer vector to check. #' #' @return Integer vector #' @author EDG #' #' @keywords internal #' @noRd #' #' @examples #' clean_int(6L) #' clean_int(3) #' # clean_int(12.1) # Error #' clean_int(c(3, 5, 7)) #' # clean_int(c(3, 5, 7.01)) # Error clean_int <- function(x, xname = deparse(substitute(x))) { if (is.integer(x)) { return(x) } else if (is.numeric(x)) { if (all(x %% 1 == 0)) { return(as.integer(x)) } else { cli::cli_abort("{.var {xname}} must be integer.") } } else if (is.null(x)) { return(NULL) } cli::cli_abort("{.var {xname}} must be integer.") } # /rtemis::clean_int # %% match_arg ---- #' Match Arguments Ignoring Case #' #' @param x Character: Argument to match. #' @param choices Character vector: Choices to match against. #' #' @return Character: Matched argument. #' #' @author EDG #' #' @keywords internal #' @noRd #' #' @examples #' match_arg("papaya", c("AppleExtreme", "SuperBanana", "PapayaMaster")) match_arg <- function(x, choices) { out <- match.arg(tolower(x), tolower(choices)) grep(out, choices, value = TRUE, ignore.case = TRUE) } # /rtemis::match_arg # %% check_logical ---- #' Check logical #' #' @param x Vector to check #' @param allow_null Logical: If TRUE, NULL values are allowed and return early. #' #' @return Called for side effects. Throws an error if checks fail. #' @author EDG #' #' @keywords internal #' @noRd check_logical <- function( x, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } if (!is.logical(x)) { cli::cli_abort("{.var {xname}} must be logical.") } invisible() } # /rtemis::check_logical # %% check_character ---- #' Check character #' #' @param x Vector to check #' @param allow_null Logical: If TRUE, NULL values are allowed and return early. #' #' @return Called for side effects. Throws an error if checks fail. #' #' @author EDG #' @keywords internal #' @noRd check_character <- function( x, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } if (!is.character(x)) { cli::cli_abort("{.var {xname}} must be character.") } invisible() } # /rtemis::check_character # %% check_floatpos ---- #' Check positive float #' #' @details #' Checking with `is.numeric()` allows integer inputs as well, which should be ok since it is #' unlikely the function that consumes this will enforce double type only, but instead is most #' likely to allow implicit coercion from integer to numeric. #' #' @param x Float vector. #' @param allow_null Logical: If TRUE, NULL values are allowed and return early. #' #' @return Called for side effects. Throws an error if checks fail, otherwise invisible(). #' #' @author EDG #' @keywords internal #' @noRd check_floatpos <- function( x, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (!is.numeric(x)) { cli::cli_abort("{.var {xname}} must be numeric.") } if (anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } if (any(x <= 0)) { cli::cli_abort("{.var {xname}} must be greater than 0.") } invisible() } # /rtemis::check_floatpos # %% check_float01exc ---- #' Check float between 0 and 1, exclusive #' #' @param x Vector to check #' @param allow_null Logical: If TRUE, NULL values are allowed and return early. #' #' @return Called for side effects. Throws an error if checks fail. #' #' @author EDG #' @keywords internal #' @noRd #' @examples #' check_float01exc(0.5) check_float01exc <- function( x, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (!is.numeric(x)) { cli::cli_abort("{.var {xname}} must be numeric.") } if (anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } if (any(x <= 0 | x >= 1)) { cli::cli_abort( "{.var {xname}} must be between 0 and 1, exclusive." ) } invisible() } # /rtemis::check_float01exc # %% check_float01inc ---- #' Check float between 0 and 1, inclusive #' #' @param x Float vector. #' @param allow_null Logical: If TRUE, NULL values are allowed and return early. #' #' @return Called for side effects. Throws an error if checks fail. #' #' @author EDG #' @keywords internal #' @noRd #' @examples #' check_float01inc(0.5) check_float01inc <- function( x, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (!is.numeric(x)) { cli::cli_abort( "{.var {xname}} must be numeric. Received: {.val {x}} of class {class(x)}", call. = FALSE ) } if (anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } if (any(x < 0 | x > 1)) { cli::cli_abort("{.var {xname}} must be between 0 and 1, inclusive.") } invisible() } # /rtemis::check_float01 # %% check_floatpos1 ---- check_floatpos1 <- function( x, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (!is.numeric(x)) { cli::cli_abort("{.var {xname}} must be numeric.") } if (anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } if (any(x <= 0) || any(x > 1)) { cli::cli_abort( "{.var {xname}} must be greater than 0 and less or equal to 1." ) } invisible() } # /rtemis::check_floatpos1 # %% clean_posint ---- #' Check positive integer #' #' @param x Integer vector. #' #' @return x, otherwise error. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' clean_posint(5) clean_posint <- function(x, allow_na = FALSE, xname = deparse(substitute(x))) { if (is.null(x)) { return(NULL) } if (!allow_na && anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } else { x <- na.exclude(x) } if (any(x <= 0)) { cli::cli_abort("{.var {xname}} must contain only positive integers.") } clean_int(x, xname = xname) } # /rtemis::clean_posint # %% check_float0pos ---- #' Check float greater than or equal to 0 #' #' Checks if an input is a numeric vector containing non-negative #' (>= 0) values and no `NA`s. It is designed to validate function arguments. #' #' @param x Numeric vector: The input object to check. #' @param allow_null Logical: If TRUE, NULL values are allowed and return early. #' #' @return Called for side effects. Throws an error if checks fail. #' #' @author EDG #' #' @keywords internal #' @noRd check_float0pos <- function( x, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (!is.numeric(x)) { cli::cli_abort("{.var {xname}} must be numeric.") } if (anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } if (any(x < 0)) { cli::cli_abort("{.var {xname}} must be zero or greater.") } invisible() } # /rtemis::check_float0pos # %% check_float_neg1_1 ---- #' Check float -1 <= x <= 1 #' #' @param x Numeric vector: The input object to check. #' @param allow_null Logical: If TRUE, NULL values are allowed and return early. #' #' @return Called for side effects. Throws an error if checks fail. #' #' @author EDG #' #' @keywords internal #' @noRd check_float_neg1_1 <- function( x, allow_null = TRUE, xname = deparse(substitute(x)) ) { if (allow_null && is.null(x)) { return(invisible()) } if (is.null(x)) { cli::cli_abort("{.var {xname}} cannot be NULL.") } if (!is.numeric(x)) { cli::cli_abort("{.var {xname}} must be numeric.") } if (anyNA(x)) { cli::cli_abort("{.var {xname}} must not contain NAs.") } if (any(x < -1 | x > 1)) { cli::cli_abort("{.var {xname}} must be between -1 and 1, inclusive.") } invisible() } # /rtemis::check_float_neg1_1 # %% abbreviate_class ---- #' Abbreviate object class name #' #' @param x Object #' #' @return Character: Abbreviated class #' #' @author EDG #' #' @keywords internal #' @noRd abbreviate_class <- function(x, n = 4L) { paste0("<", abbreviate(class(x)[1], minlength = n), ">") } # /rtemis::abbr_class # %% check_dependencies ---- #' \pkg{rtemis} internal: Dependencies check #' #' Checks if dependencies can be loaded; names missing dependencies if not. #' #' @param ... List or vector of strings defining namespaces to be checked #' @param verbosity Integer: Verbosity level. #' Note: An error will always printed if dependencies are missing. #' Setting this to FALSE stops it from printing #' "Dependencies check passed". #' #' @return Called for side effects. Aborts and prints list of missing dependencies, if any. #' #' @author EDG #' #' @keywords internal #' @noRd check_dependencies <- function(..., verbosity = 0L) { ns <- as.list(c(...)) err <- !sapply(ns, \(i) requireNamespace(i, quietly = TRUE)) if (any(err)) { cli::cli_abort( paste0( "Please install the following ", ngettext(sum(err), "dependency", "dependencies"), ":\n", pastels(ns[err], bullet = " -") ) ) } else { if (verbosity > 0L) msg("Dependency check passed") } invisible() } # /rtemis::check_dependencies # %% check_data.table ---- #' Check data.table #' #' @param x Object to check. #' #' @return Called for side effects. Throws an error if input is not a data.table, returns x #' invisibly otherwise. #' #' @author EDG #' @keywords internal #' @noRd check_data.table <- function(x, xname = deparse(substitute(x))) { if (!data.table::is.data.table(x)) { cli::cli_abort("{.var {xname}} must be a data.table.") } invisible(x) } # /rtemis::check_data.table # %% check_tabular ---- #' Check object is tabular #' #' Checks if object is of class `data.frame`, `data.table`, or `tbl_df`. #' #' @param x Object to check. #' #' @return Called for side effects. Throws an error if input is not tabular, returns x invisibly #' otherwise. #' #' @author EDG #' @keywords internal #' @noRd check_tabular <- function(x) { if (!inherits(x, c("data.frame", "data.table", "tbl_df"))) { cli::cli_abort( "{.var {deparse(substitute(x))}} must be a data.frame, data.table, or tbl_df." ) } invisible(x) } # /rtemis::check_tabular ================================================ FILE: R/utils_color.R ================================================ # utils_color.R # ::rtemis:: # 2016- EDG rtemis.org #' Simple Color Operations #' #' Invert a color or calculate the mean of two colors in HSV or RGB space. #' This may be useful in creating colors for plots #' #' The average of two colors in RGB space will often pass through gray, #' which is likely undesirable. Averaging in HSV space, better for most applications. #' @param col Input color(s) #' @param fn Character: "invert", "mean": Function to perform #' @param space Character: "HSV", "RGB": Colorspace to operate in - for #' averaging only #' #' @return Color #' #' @author EDG #' @keywords internal #' @noRd color_op <- function(col, fn = c("invert", "mean"), space = c("HSV", "RGB")) { # Arguments ---- fn <- match.arg(fn) space <- match.arg(space) # Colors ---- col <- as.list(col) col.rgb <- col2rgb(col, alpha = TRUE) if (fn == "invert") { inverted <- apply(col.rgb, 2, \(i) 255 - i) # maintain alpha inverted[4, ] <- col.rgb[4, ] invertedl <- lapply(seq_len(NCOL(inverted)), \(i) { rgb( inverted[1, i], inverted[2, i], inverted[3, i], inverted[4, i], maxColorValue = 255 ) }) if (!is.null(names(col))) { names(invertedl) <- paste0(names(col), ".invert") } return(invertedl) } else if (fn == "mean") { if (length(col) < 2) { cli::cli_abort("Need at least two colors to average") } if (space == "RGB") { averaged <- rowMeans(col.rgb) averaged <- rgb( averaged[1], averaged[2], averaged[3], averaged[4], maxColorValue = 255 ) return(list(average = averaged)) } else if (space == "HSV") { # Convert HSV to RGB col.hsv <- rgb2hsv(col.rgb[1:3, ]) # Get mean HSV values averaged <- rowMeans(col.hsv) # Get mean alpha from RGB alpha <- mean(col.rgb[4, ]) # Turn to hex averaged <- hsv(averaged[1], averaged[2], averaged[3], alpha / 255) return(averaged) } } } # /rtemis::color_op #' Color to Grayscale #' #' Convert a color to grayscale #' #' Uses the NTSC grayscale conversion: #' 0.299 * R + 0.587 * G + 0.114 * B #' #' @param x Color to convert to grayscale #' @param what Character: "color" returns a hexadecimal color, #' "decimal" returns a decimal between 0 and 1 #' #' @return Character: color hex code. #' #' @author EDG #' @export #' #' @examples #' col2grayscale("red") #' col2grayscale("red", "dec") col2grayscale <- function(x, what = c("color", "decimal")) { what <- match.arg(what) col <- col2rgb(x) gs <- (0.299 * col[1, ] + 0.587 * col[2, ] + 0.114 * col[3, ]) / 255 if (what == "color") { grDevices::gray(gs) } else { gs } } # /rtemis::col2grayscale #' Invert Color in RGB space #' #' @param x Color, vector #' #' @return Inverted colors using hexadecimal notation `#RRGGBBAA`. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' cols <- c("red", "green", "blue") #' previewcolor(cols) #' cols |> #' color_invertRGB() |> #' previewcolor() color_invertRGB <- function(x) { col <- as.list(x) col_rgb <- col2rgb(col, alpha = TRUE) inverted <- apply(col_rgb, 2, \(i) 255 - i) # maintain alpha inverted[4, ] <- col_rgb[4, ] invertedl <- sapply(seq_len(NCOL(inverted)), \(i) { rgb( inverted[1, i], inverted[2, i], inverted[3, i], inverted[4, i], maxColorValue = 255 ) }) if (!is.null(names(col))) { names(invertedl) <- paste0(names(col), ".invert") } invertedl } # /rtemis::color_invertRGB #' Fade color towards target #' #' @param x Color source #' @param to Target color #' @param pct Numeric (0, 1) fraction of the distance in RGBA space between #' `x` and `to` to move. e.g. .5 gets the mean RGBA value of the two #' #' @return Color in hex notation #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' previewcolor(c("red", color_fade("red", "blue", .5), "blue")) color_fade <- function(x, to = "#000000", pct = .5) { col <- col2rgb(x, alpha = TRUE) col2 <- col2rgb(to, alpha = TRUE) d <- (col2 - col) * pct colf <- (col + d) / 255 rgb(colf[1], colf[2], colf[3], colf[4]) } #' Pastelify a color (make a color more pastel) #' #' Lower a color's saturation by a given percent in the HSV color system #' #' @param x Color vector: Color(s) to operate on #' @param s Float: Decrease saturation by this fraction. For example, if `s = 0.3` and saturation of #' input color is 1, it will become 0.7. #' #' @return Character vector with hex codes of modified colors. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' cols <- c("red", "green", "blue") #' previewcolor(cols) #' cols_d <- desaturate(cols) #' previewcolor(cols_d) desaturate <- function(x, s = 0.3) { # Infer color names, if available if (!is.null(names(x))) { .names <- names(x) } else if (is.character(x)) { .names <- x } else { .names <- NULL } x <- lapply(x, col2rgb) x <- lapply(x, rgb2hsv) xp <- lapply(x, function(i) { .s <- i[2] i[2] <- .s - (.s * s) hsv(i[1], i[2], i[3]) }) names(xp) <- .names unlist(xp) } # /rtemis::desaturate #' Convert R color to hexadecimal code #' #' Convert a color that R understands into the corresponding hexadecimal code #' #' @param color Color(s) that R understands #' #' @return Character vector of hexadecimal codes. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' col2hex(c("gray50", "skyblue")) col2hex <- function(color) { .rgb <- col2rgb(color) sapply(seq_along(color), function(i) { paste0( "#", paste0( sprintf( "%02s", c( as.character(as.hexmode(.rgb[1, i])), as.character(as.hexmode(.rgb[2, i])), as.character(as.hexmode(.rgb[3, i])) ) ), collapse = "" ) ) }) } # /rtemis::col2hex #' Adjust HSV Color #' #' Modify alpha, hue, saturation and value (HSV) of a color #' #' @param color Input color. Any format that grDevices::col2rgb() recognizes #' @param alpha Numeric: Scale alpha by this amount. Future: replace with absolute setting #' @param hue Float: How much hue to add to `color` #' @param sat Float: How much saturation to add to `color` #' @param val Float: How much to increase value of `color` by #' #' @return Adjusted color #' #' @author EDG #' @export #' #' @examples #' previewcolor(c(teal = "#00ffff", teal50 = color_adjust("#00ffff", alpha = 0.5))) color_adjust <- function(color, alpha = NULL, hue = 0, sat = 0, val = 0) { ac <- color # HSV ---- ac.hsv <- grDevices::rgb2hsv(grDevices::col2rgb(ac)) ac <- grDevices::hsv(ac.hsv[1] + hue, ac.hsv[2] + sat, ac.hsv[3] + val) # alpha ---- if (!is.null(alpha)) { ac <- adjustcolor(ac, alpha.f = alpha) } ac } # /rtemis::color_adjust #' Preview color #' #' Preview one or multiple colors using little rhombi with their little labels up top #' #' @param x Color, vector: One or more colors that R understands #' @param main Character: Title. Default = NULL, which results in #' `deparse(substitute(x))` #' @param bg Background color. #' @param main_col Color: Title color #' @param main_x Float: x coordinate for `main`. #' @param main_y Float: y coordinate for `main`. #' @param main_adj Float: `adj` argument to mtext for `main`. #' @param main_cex Float: character expansion factor for `main`. #' @param main_font Integer, 1 or 2: Weight of `main` 1: regular, 2: bold. #' @param width Float: Plot width. Default = NULL, i.e. set automatically #' @param xlim Vector, length 2: x-axis limits. Default = NULL, i.e. set automatically #' @param ylim Vector, length 2: y-axis limits. #' @param asp Float: Plot aspect ratio. #' @param labels_y Float: y coord for labels. Default = 1.55 (rhombi are fixed and range y .5 - 1.5) #' @param label_cex Float: Character expansion for labels. Default = NULL, and is #' calculated automatically based on length of `x` #' @param mar Numeric vector, length 4: margin size. #' @param filename Character: Path to save plot as PDF. #' @param pdf_width Numeric: Width of PDF in inches. #' @param pdf_height Numeric: Height of PDF in inches. #' #' @return Nothing, prints plot. #' #' @author EDG #' @export #' #' @examples #' previewcolor(get_palette("rtms")) previewcolor <- function( x, main = NULL, bg = "#333333", main_col = "#b3b3b3", main_x = .7, main_y = 0.2, main_adj = 0, main_cex = .9, main_font = 2, width = NULL, xlim = NULL, ylim = c(0, 2.2), asp = 1, labels_y = 1.55, label_cex = NULL, mar = c(0, 0, 0, 1), filename = NULL, pdf_width = 8, pdf_height = 2.5 ) { if (is.null(main)) { main <- deparse(substitute(x)) } x <- unlist(x) par_orig <- par(no.readonly = TRUE) on.exit(par(par_orig)) if (is.null(width)) { width <- max(3, .3 * length(x)) } if (is.null(xlim)) { xlim <- c(0.3, width + .7) } if (!is.null(filename)) { grDevices::pdf(filename, pdf_width, pdf_height) } par(bg = bg, mar = mar, oma = c(0, 0, 0, 0), xaxs = "i", yaxs = "i") # Plot ---- plot( NULL, NULL, asp = asp, axes = FALSE, xlim = xlim, ylim = ylim, xlab = NA, ylab = NA ) if (length(x) >= 3) { xmid <- seq(1, width, length.out = length(x)) } else if (length(x) == 2) { xmid <- c(.3333 * width, .6666 * width) + .5 } else { xmid <- .5 * width + .5 } for (i in seq(x)) { rhombus(xmid[i], 1, col = x[i]) } # '- Labels ---- if (is.null(label_cex)) { label_cex <- 1.30 - .02 * length(x) label_cex <- 1.314869 - 0.009163 * length(x) } if (is.null(names(x))) { labels <- as.character(x) } else { labels <- names(x) } text( xmid + .1, labels_y, labels, col = x, srt = 45, adj = 0, offset = 0, cex = label_cex, xpd = TRUE ) # '- Title ---- if (!is.null(main)) { text( main_x, main_y, main, col = main_col, adj = main_adj, font = main_font, cex = main_cex ) } if (!is.null(filename)) { dev.off() } } # /rtemis::previewcolor rhombus <- function( xmid = 1, ymid = 1, width = 1, height = 1, col = "#80FFFF" ) { # left, top, right, bottom hw <- .5 * width hh <- .5 * height polygon( x = c(xmid - hw, xmid, xmid + hw, xmid), y = c(ymid, ymid + hh, ymid, ymid - hh), col = col, border = NA ) } # /rtemis::rhombus #' Color Gradient #' #' Create a gradient of colors and optionally a colorbar #' #' It is best to provide an odd number, so that there is always an equal number of colors on either side #' of the midpoint. #' For example, if you want a gradient from -1 to 1 or equivalent, an n = 11, will give 5 colors on either #' side of 0, each representing a 20\% change from the next. #' #' `colors` can be defined as a sequence of 3-letter color abbreviations of 2, 3, 4, or 5 colors #' which will correspond to values: \{"lo","hi"\}; \{"lo", "mid", "hi"\}; \{"lo", "mid", "midhi", "hi"\}, and #' \{"lo", "lomid", "mid", "midhi", "hi"\}, respectively. #' For example, try `colorgrad(21, "blugrnblkredyel", colorbar = TRUE)` #' 3-letter color abbreviations: #' wht: white; blk: black; red; grn: green; blu: blue; yel: yellow; rng: orange; prl: purple #' #' @param n Integer: How many distinct colors you want. If not odd, converted to `n + 1` #' Defaults to 21 #' @param colors Character: Acts as a shortcut to defining `lo`, `mid`, etc for a number of defaults: #' "french", "penn", "grnblkred", #' @param space Character: Which colorspace to use. Option: "rgb", or "Lab". #' Recommendation: If `mid` is "white" or "black" (default), use "rgb", otherwise "Lab" #' @param lo Color for low end #' @param lomid Color for low-mid #' @param mid Color for middle of the range or "mean", which will result in `color_op(c(lo, hi), "mean")`. #' If `mid = NA`, then only `lo` and `hi` are used to create the color gradient. #' @param midhi Color for middle-high #' @param hi Color for high end #' @param preview Logical: Plot the colors horizontally #' @param cb_n Integer: How many steps you would like in the colorbar #' @param bar_min Numeric: Lowest value in colorbar #' @param bar_mid Numeric: Middle value in colorbar #' @param bar_max Numeric: Max value in colorbar #' @param cex Float: Character expansion for axis #' @param theme Theme object. #' @param bg Color: Background color #' @param col_text Color: Colorbar text color #' @param plotlycb Logical: Create colorbar using `plotly` (instead of base R graphics) #' @param plotly_width Float: Width for plotly colorbar. #' @param plotly_height Float: Height for plotly colorbar. #' @param return_plotly Logical: If TRUE, return `plotly` object #' @param margins Vector: Plotly margins. #' @param pad Float: Padding for `plotly`. #' #' @return Invisible vector of hexadecimal colors / plotly object if `return_plotly = TRUE` #' #' @author EDG #' @keywords internal #' @noRd colorgrad <- function( n = 21L, colors = NULL, space = c("rgb", "Lab"), lo = rt_teal, lomid = NULL, mid = NULL, midhi = NULL, hi = rt_orange, preview = FALSE, cb_n = 21L, bar_min = -1, bar_mid = 0, bar_max = 1, cex = 1.2, theme = choose_theme(getOption("rtemis_theme")), bg = NULL, col_text = NULL, plotlycb = FALSE, plotly_width = 80, plotly_height = 500, return_plotly = FALSE, margins = c(0, 0, 0, 0), pad = 0L ) { # Arguments ---- n <- as.integer(n) if (n %% 2 != 1) { n <- n + 1 } if (return_plotly) { plotlycb <- TRUE } if (is.null(cb_n)) { cb_n <- n if (cb_n %% 2 != 1) cb_n <- cb_n + 1 } space <- match.arg(space) theme <- if (strtrim(theme@name, 4) %in% c("dark", "blac")) { "dark" } else { "light" } # Colors ---- if (!is.null(colors)) { if (colors == "french") { lo <- "#01256E" lomid <- NULL mid <- "white" midhi <- NULL hi <- "#95001A" } else if (colors == "penn") { lo <- "#02CFFF" lomid <- NULL mid <- "#01256E" midhi <- "#95001A" hi <- "#F2C100" } else if (colors == "blues") { lo <- "#01256E" mid <- NULL hi <- "#82AFD3" } else if (colors == "greens") { lo <- "#005200" mid <- NULL hi <- "#80DF80" } else { cols <- colorvec(cols = colors) lo <- cols$lo lomid <- cols$lomid mid <- cols$mid midhi <- cols$midhi hi <- cols$hi } } # Grad ---- n <- as.integer(n) midpoint <- ceiling(n / 2) if (is.null(mid)) { mid <- ifelse(theme == "light", "white", "black") } if (!is.na(mid)) { if (mid == "mean") { mid <- color_op(c(lo, hi), "mean") } lo2mid <- colorRampPalette(c(lo, lomid, mid), space = space) mid2hi <- colorRampPalette(c(mid, midhi, hi), space = space) grad <- c(lo2mid(midpoint), mid2hi(n - midpoint + 1)[-1]) } else { grad <- colorRampPalette(c(lo, hi), space = space)(n) } if (cb_n != n) { cb_n <- as.integer(cb_n) cb_midpoint <- ceiling(cb_n / 2) # if (is.null(mid)) mid <- color_op(c(lo, hi), "mean") # lo2mid <- grDevices::colorRampPalette(c(lo, lomid, mid), space = space) # mid2hi <- grDevices::colorRampPalette(c(mid, midhi, hi), space = space) if (!is.na(mid)) { cb_grad <- c(lo2mid(cb_midpoint), mid2hi(cb_n - cb_midpoint + 1)[-1]) } else { cb_grad <- colorRampPalette(c(lo, hi), space = space)(cb_n) } } else { cb_grad <- grad cb_midpoint <- midpoint } # Preview ---- if (preview) { plot( rep(1, n), col = grad, pch = 19, cex = 6, xlim = c(0.5, n + .5), ylim = c(.8, 1.2), ann = FALSE, axes = FALSE ) text( x = 0.25, y = 1.05, labels = paste0("Color gradient (n = ", n, ")"), adj = 0, cex = 1.5 ) segments(midpoint, .95, midpoint, 1.05, lwd = 2, lty = 2, col = NA) } # Plotly cb ---- if (plotlycb) { requireNamespace("plotly") m <- list( size = 40, color = grad, opacity = 1, symbol = "circle" ) x.ax <- list( title = "", zeroline = FALSE, showline = FALSE, showticklabels = FALSE, showgrid = FALSE, range = c(0.8, 1.4) ) y.ax <- list( title = "", zeroline = FALSE, showline = FALSE, showticklabels = FALSE, showgrid = FALSE ) t <- list( family = "Open Sans", size = 22, color = plotly::toRGB("black") ) a <- list() for (i in 1:3) { a[[i]] <- list( x = 1.3, y = c(1, midpoint, n)[i], text = as.character(c(bar_min, bar_mid, bar_max))[i], xref = "x", yref = "y", showarrow = FALSE ) } hovtext <- ddSci(seq(bar_min, bar_max, (bar_max - bar_min) / (n - 1))) margin <- list( b = margins[1], l = margins[2], t = margins[3], r = margins[4], pad = pad ) p <- plotly::plot_ly( x = rep(1, n), y = 1:n, type = "scatter", mode = "markers", marker = m, hoverinfo = "text", text = hovtext ) |> plotly::layout( xaxis = x.ax, yaxis = y.ax, width = plotly_width, height = plotly_height, annotations = a, font = t, margin = margin ) |> plotly::config(displayModeBar = FALSE) if (plotlycb && !return_plotly) print(p) } # out ---- if (return_plotly) { return(p) } invisible(grad) } # /rtemis::colorgrad # 3-letter Color Name Abbreviations # wht white # blk black # red # grn green # blu blue # yel yellow # rng orange # prl purple colorvec <- function(cols) { if (nchar(cols) %% 3 != 0) { cli::cli_abort( "All colors must be specified by their 3-letter abbreviations" ) } cols <- tolower(cols) ncols <- nchar(cols) / 3 cols <- lapply(seq(ncols), function(i) substr(cols, i * 3 - 2, i * 3)) coldf <- data.frame( abbr = c("wht", "red", "grn", "blu", "blk", "yel", "rng", "prl"), name = c( "white", "red", "green", "blue", "black", "yellow", "orange", "purple" ), stringsAsFactors = FALSE ) cols <- sapply(1:ncols, function(i) coldf[coldf[["abbr"]] == cols[i], 2]) lo <- lomid <- mid <- midhi <- hi <- NULL collist <- list( twocols = c("lo", "hi"), threecols = c("lo", "mid", "hi"), fourcols = c("lo", "mid", "midhi", "hi"), fivecols = c("lo", "lomid", "mid", "midhi", "hi") ) for (i in seq(ncols)) { assign(collist[[ncols - 1]][i], cols[i]) } list(lo = lo, lomid = lomid, mid = mid, midhi = midhi, hi = hi) } autoalpha <- function(x, gamma = .0008, min = .3) { max(min, 1 - x * gamma) } ================================================ FILE: R/utils_data.R ================================================ # utils_data.R # ::rtemis:: # EDG rtemis.org # %% Public ---------------------------------------------------------------------------------------- #' Describe factor #' #' Outputs a single character with names and counts of each level of the input factor. #' #' @param x factor. #' @param ... See details. #' #' @details #' Extra arguments: #' - `max_n`: Integer: Return counts for up to this many levels. #' - `return_ordered`: Logical: If TRUE, return levels ordered by count, otherwise return in level order. #' - `verbosity`: Integer: Verbosity level. #' #' @return Character with level counts. #' #' @author EDG #' @noRd #' #' @examples #' # Small number of levels #' describe(iris[["Species"]]) #' #' # Large number of levels: show top n by count #' x <- factor(sample(letters, 1000, TRUE)) #' describe(x) #' describe(x, 3) #' describe(x, 3, return_ordered = FALSE) method(describe, class_factor) <- function( x, max_n = 5, return_ordered = TRUE, verbosity = 1L ) { x <- factor(x) x_levels <- levels(x) n_unique <- length(x_levels) x_freqs <- as.integer(table(x)) if (return_ordered) { idi <- order(x_freqs, decreasing = TRUE) } if (n_unique <= max_n) { if (return_ordered) { out <- paste(x_levels[idi], x_freqs[idi], sep = ": ", collapse = "; ") } else { out <- paste(x_levels, x_freqs, sep = ": ", collapse = "; ") } } else { idi <- order(x_freqs, decreasing = TRUE) if (return_ordered) { idi <- idi[seq_len(max_n)] out <- paste0( "(Top ", max_n, " of ", n_unique, ") ", paste(x_levels[idi], x_freqs[idi], sep = ": ", collapse = "; ") ) } else { idx <- seq_len(max_n) out <- paste0( "(First ", max_n, " of ", n_unique, ") ", paste(x_levels[idx], x_freqs[idx], sep = ": ", collapse = "; ") ) } } if (verbosity > 0L) { print(out) } invisible(out) } # /rtemis::describe.factor #' Match cases by covariates #' #' Find one or more cases from a `pool` data.frame that match cases in a target #' data.frame. Match exactly and/or by distance (sum of squared distances). #' #' @param target data.frame you are matching against. #' @param pool data.frame you are looking for matches from. #' @param n_matches Integer: Number of matches to return. #' @param target_id Character: Column name in `target` that holds unique #' cases IDs. Default = NULL, in which case integer case numbers will be used. #' @param pool_id Character: Same as `target_id` for `pool`. #' @param exactmatch_factors Logical: If TRUE, selected cases will have to #' exactly match factors available in `target`. #' @param exactmatch_cols Character: Names of columns that should be matched #' exactly. #' @param distmatch_cols Character: Names of columns that should be #' distance-matched. #' @param norepeats Logical: If TRUE, cases in `pool` can only be chosen #' once. #' @param ignore_na Logical: If TRUE, ignore NA values during exact matching. #' @param verbosity Integer: Verbosity level. #' #' @return data.frame #' #' @author EDG #' @export #' #' @examples #' set.seed(2021) #' cases <- data.frame( #' PID = paste0("PID", seq(4)), #' Sex = factor(c(1, 1, 0, 0)), #' Handedness = factor(c(1, 1, 0, 1)), #' Age = c(21, 27, 39, 24), #' Var = c(.7, .8, .9, .6), #' Varx = rnorm(4) #' ) #' controls <- data.frame( #' CID = paste0("CID", seq(50)), #' Sex = factor(sample(c(0, 1), 50, TRUE)), #' Handedness = factor(sample(c(0, 1), 50, TRUE, c(.1, .9))), #' Age = sample(16:42, 50, TRUE), #' Var = rnorm(50), #' Vary = rnorm(50) #' ) #' #' mc <- matchcases(cases, controls, 2, "PID", "CID") matchcases <- function( target, pool, n_matches = 1, target_id = NULL, pool_id = NULL, exactmatch_factors = TRUE, exactmatch_cols = NULL, distmatch_cols = NULL, norepeats = TRUE, ignore_na = FALSE, verbosity = 1L ) { ntarget <- nrow(target) npool <- nrow(pool) # Get IDs if (is.null(target_id)) { targetID <- seq(ntarget) } else { targetID <- target[, target_id] target[, target_id] <- NULL } if (is.null(pool_id)) { poolID <- seq(npool) } else { poolID <- pool[, pool_id] pool[, pool_id] <- NULL } # exact- & dist-matched column names if (is.null(exactmatch_cols) && exactmatch_factors) { exactmatch_cols <- colnames(target)[sapply(target, is.factor)] } # Keep exactmatch_cols present in pool exactmatch_cols <- exactmatch_cols[exactmatch_cols %in% colnames(pool)] if (is.null(distmatch_cols)) { distmatch_cols <- colnames(target)[!colnames(target) %in% exactmatch_cols] } # Keep distmatch_cols present in pool distmatch_cols <- distmatch_cols[distmatch_cols %in% colnames(pool)] # Remove unused columns, if any .remove <- colnames(target)[ !colnames(target) %in% c(exactmatch_cols, distmatch_cols) ] target[, .remove] <- NULL .remove <- colnames(pool)[ !colnames(pool) %in% c(exactmatch_cols, distmatch_cols) ] pool[, .remove] <- NULL # Convert all non-exact-matching to numeric tonumeric <- distmatch_cols[!sapply(target[, distmatch_cols], is.numeric)] if (length(tonumeric) > 0) { target[, tonumeric] <- lapply(target[, tonumeric, drop = FALSE], as.numeric) } tonumeric <- distmatch_cols[!sapply(pool[, distmatch_cols], is.numeric)] if (length(tonumeric) > 0) { pool[, tonumeric] <- lapply(pool[, tonumeric, drop = FALSE], as.numeric) } # Normalize all vcat <- rbind(target, pool) vcat[, distmatch_cols] <- lapply(vcat[, distmatch_cols, drop = FALSE], scale) target_s <- cbind(targetID = targetID, vcat[seq(ntarget), ]) pool_s <- cbind(poolID = poolID, vcat[-seq(ntarget), ]) rm(vcat) # For each target, select matches on categoricals, # then order pool by distance. mc <- data.frame(targetID = targetID, match = matrix(NA, ntarget, n_matches)) for (i in seq(ntarget)) { if (verbosity > 0L) { msg("Working on case", i, "of", ntarget) } if (is.null(exactmatch_cols)) { subpool <- pool_s } else { ind <- sapply(seq_len(nrow(pool_s)), function(j) { all( target_s[i, exactmatch_cols] == pool_s[j, exactmatch_cols], na.rm = ignore_na ) }) subpool <- pool_s[ind, , drop = FALSE] } distord <- order(sapply( seq_len(nrow(subpool)), function(j) { mse( unlist(target_s[i, distmatch_cols]), unlist(subpool[j, distmatch_cols]), na.rm = ignore_na ) } )) n_matched <- min(n_matches, nrow(subpool)) mc[i, 2:(n_matched + 1)] <- subpool[, 1][distord[seq(n_matched)]] if (norepeats) { pool_s <- pool_s[!pool_s[, 1] %in% mc[i, 2:(n_matches + 1)], ] } } mc } # /rtemis::matchcases #' Index columns by attribute name & value #' #' @param x tabular data. #' @param name Character: Name of attribute. #' @param value Character: Value of attribute. #' @param exact Logical: Passed to `attr` when retrieving attribute value. If `TRUE`, attribute #' name must match `name` exactly, otherwise, partial match is allowed. #' #' @return Integer vector. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table( #' id = 1:5, #' sbp = rnorm(5, 120, 15), #' dbp = rnorm(5, 80, 10), #' paO2 = rnorm(5, 90, 10), #' paCO2 = rnorm(5, 40, 5) #' ) #' setattr(x[["sbp"]], "source", "outpatient") #' setattr(x[["dbp"]], "source", "outpatient") #' setattr(x[["paO2"]], "source", "icu") #' setattr(x[["paCO2"]], "source", "icu") #' index_col_by_attr(x, "source", "icu") index_col_by_attr <- function(x, name, value, exact = TRUE) { colattr <- lapply(x, \(i) attr(i, name, exact = exact)) # Convert to character vector maintaining NULL values (where attribute is not set) colattr <- sapply(colattr, function(i) { if (is.null(i)) NA_character_ else as.character(i) }) which(colattr == value) } # /rtemis.utils::index_col_by_attr #' Tabulate column attributes #' #' @param x tabular data: Input data set. #' @param attr Character: Attribute to get #' @param useNA Character: Passed to `table` #' #' @return table. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table( #' id = 1:5, #' sbp = rnorm(5, 120, 15), #' dbp = rnorm(5, 80, 10), #' paO2 = rnorm(5, 90, 10), #' paCO2 = rnorm(5, 40, 5) #' ) #' setattr(x[["sbp"]], "source", "outpatient") #' setattr(x[["dbp"]], "source", "outpatient") #' setattr(x[["paO2"]], "source", "icu") #' setattr(x[["paCO2"]], "source", "icu") #' table_column_attr(x, "source") table_column_attr <- function(x, attr = "source", useNA = "always") { attrs <- sapply(x, \(i) { if (is.null(attr(i, attr, exact = TRUE))) { NA_character_ } else { attr(i, attr, exact = TRUE) } }) table(attrs, useNA = useNA) } # /rtemis::table_column_attr #' List column names by class #' #' @param x tabular data. #' @param sorted Logical: If TRUE, sort the output #' @param item_format Function: Function to format each item #' @param maxlength Integer: Maximum number of items to print #' #' @return `NULL`, invisibly. #' #' @author EDG #' @export #' #' @examples #' names_by_class(iris) names_by_class <- function( x, sorted = TRUE, item_format = highlight, maxlength = 24 ) { classes <- sapply(x, class) vals <- unique(classes) out <- if (sorted) { sapply(vals, \(i) sort(names(x)[classes == i])) } else { sapply(vals, \(i) names(x)[classes == i]) } cat(repr_ls(out, item_format = item_format, maxlength = maxlength)) invisible() } # /rtemis::names_by_class #' Inspect character and factor vector #' #' Checks character or factor vector to determine whether it might be best to convert to #' numeric. #' #' @details #' All data can be represented as a character string. A numeric variable may be read as #' a character variable if there are non-numeric characters in the data. #' It is important to be able to automatically detect such variables and convert them, #' which would mean introducing NA values. #' #' @param x Character or factor vector. #' @param xname Character: Name of input vector `x`. #' @param verbosity Integer: Verbosity level. #' @param thresh Numeric: Threshold for determining whether to convert to numeric. #' @param na.omit Logical: If TRUE, remove NA values before checking. #' #' @return Character. #' #' @author EDG #' @export #' #' @examples #' x <- c("3", "5", "undefined", "21", "4", NA) #' inspect_type(x) #' z <- c("mango", "banana", "tangerine", NA) #' inspect_type(z) inspect_type <- function( x, xname = NULL, verbosity = 1L, thresh = .5, na.omit = TRUE ) { if (is.null(xname)) { xname <- deparse(substitute(x)) } if (na.omit) { x <- na.omit(x) } xclass <- class(x)[1] xlen <- length(x) raw_na <- sum(is.na(x)) n_non_na <- xlen - raw_na # char_na <- sum(is.na(as.character(x))) suppressWarnings({ num_na <- if (xclass == "character") { sum(is.na(as.numeric(x))) } else { sum(is.na(as.numeric(as.character(x)))) } }) if (raw_na == xlen) { "NA" } else if ( xclass %in% c("character", "factor") && (num_na / n_non_na) < thresh ) { if (verbosity > 0L) { msg0( "Possible type error: ", highlight(xname), " is a ", bold(xclass), ", but perhaps should be ", bold("numeric"), "." ) } "numeric" } else { xclass } } # /rtemis::inspect_type ================================================ FILE: R/utils_data.table.R ================================================ # utils_data.table.R # ::rtemis:: # 2022- EDG rtemis.org #' Number of unique values per feature #' #' @param x data.table: Input data.table. #' @param excludeNA Logical: If TRUE, exclude NA values. #' @param limit Integer: Print up to this many features. Set to -1L to print all. #' @param verbosity Integer: If > 0, print output to console. #' #' @return Named integer vector of length `NCOL(x)` with number of unique values per column/feature, invisibly. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' ir <- as.data.table(iris) #' dt_nunique_perfeat(ir) dt_nunique_perfeat <- function( x, excludeNA = FALSE, limit = 20L, verbosity = 1L ) { stopifnot(inherits(x, "data.table")) nupf <- sapply(x, \(i) data.table::uniqueN(i, na.rm = excludeNA)) if (verbosity > 0L) { printls(nupf, item_format = thin, limit = limit, print_class = FALSE) } invisible(nupf) } # /rtemis::dt_nunique_perfeat #' Long to wide key-value reshaping #' #' Reshape a long format `data.table` using key-value pairs with #' `data.table::dcast` #' #' @param x `data.table` object. #' @param id_name Character: Name of column in `x` that defines the IDs #' identifying individual rows. #' @param key_name Character: Name of column in `x` that holds the key. #' @param positive Numeric or Character: Used to fill id ~ key combination #' present in the long format input `x`. #' @param negative Numeric or Character: Used to fill id ~ key combination #' NOT present in the long format input `x`. #' @param xname Character: Name of `x` to be used in messages. #' @param verbosity Integer: Verbosity level. #' #' @return `data.table` in wide format. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table( #' ID = rep(1:3, each = 2), #' Dx = c("A", "C", "B", "C", "D", "A") #' ) #' dt_keybin_reshape(x, id_name = "ID", key_name = "Dx") dt_keybin_reshape <- function( x, id_name, key_name, positive = 1, negative = 0, xname = NULL, verbosity = 1L ) { if (is.null(xname)) { xname <- deparse(substitute(x)) } stopifnot(inherits(x, "data.table")) x <- copy(x) # Assign positive value to all in long form value_name <- "Bin__" x[, (value_name) := positive] .formula <- as.formula(paste( paste(id_name, collapse = " + "), "~", key_name )) if (verbosity > 0L) { msg("Reshaping", highlight(xname), "to wide format...") catsize(x, "Input size") } # Reshape to wide, filling all absent with negative value x <- dcast( x, .formula, fun.aggregate = length, value.var = value_name, drop = FALSE, fill = negative ) if (verbosity > 0L) { catsize(x, "Output size") } x } # /rtemis::dt_keybin_reshape #' Merge data.tables #' #' @param left data.table #' @param right data.table #' @param on Character: Name of column to join on. #' @param left_on Character: Name of column on left table. #' @param right_on Character: Name of column on right table. #' @param how Character: Type of join: "inner", "left", "right", "outer". #' @param left_name Character: Name of left table. #' @param right_name Character: Name of right table. #' @param left_suffix Character: If provided, add this suffix to all left column names, #' excluding on/left_on. #' @param right_suffix Character: If provided, add this suffix to all right column names, #' excluding on/right_on. #' @param verbosity Integer: Verbosity level. #' @param ... Additional arguments to be passed to `data.table::merge`. #' #' @return Merged data.table. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' xleft <- data.table(ID = 1:5, Alpha = letters[1:5]) #' xright <- data.table(ID = c(3, 4, 5, 6), Beta = LETTERS[3:6]) #' xlr_inner <- dt_merge(xleft, xright, on = "ID", how = "inner") dt_merge <- function( left, right, on = NULL, left_on = NULL, right_on = NULL, how = "left", left_name = NULL, right_name = NULL, left_suffix = NULL, right_suffix = NULL, verbosity = 1L, ... ) { if (is.null(left_name)) { left_name <- deparse(substitute(left)) } if (is.null(right_name)) { right_name <- deparse(substitute(right)) } if (is.null(left_on)) { left_on <- on } if (is.null(right_on)) { right_on <- on } if (verbosity > 0L) { icon <- switch( how, inner = "\u2A1D", left = "\u27D5", right = "\u27D6", "\u27D7" ) if (left_on == right_on) { msg0( bold(highlight(icon)), " Merging ", highlight(left_name), " & ", highlight(right_name), " on ", highlight(left_on), "..." ) } else { msg0( bold(highlight(icon)), " Merging ", highlight(left_name), " & ", highlight(right_name), " on ", highlight(left_on), " & ", highlight(right_on), "..." ) } catsize(left, left_name) catsize(right, right_name) } if (how == "left") { all.x <- TRUE all.y <- FALSE } else if (how == "right") { all.x <- FALSE all.y <- TRUE } else if (how == "inner") { all.x <- FALSE all.y <- FALSE } else { all.x <- all.y <- TRUE } if (!is.null(left_suffix)) { left_names <- setdiff(names(left), left_on) setnames(left, left_names, paste0(left_names, left_suffix)) } if (!is.null(right_suffix)) { right_names <- setdiff(names(right), right_on) setnames(right, right_names, paste0(right_names, right_suffix)) } dat <- merge( left, right, by.x = left_on, by.y = right_on, all.x = all.x, all.y = all.y, ... ) if (verbosity > 0L) { catsize(dat, "Merged") } dat } # /rtemis::dt_merge #' Clean factor levels of data.table ***in-place*** #' #' Finds all factors in a data.table and cleans factor levels to include #' only underscore symbols #' #' @param x data.table: Input data.table. Will be modified ***in-place***. #' @param prefix_digits Character: If not NA, add this prefix to all factor levels that #' are numbers #' #' @return Nothing, modifies `x` ***in-place***. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- as.data.table(iris) #' levels(x[["Species"]]) <- c("setosa:iris", "versicolor$iris", "virginica iris") #' levels(x[["Species"]]) #' dt_set_cleanfactorlevels(x) #' levels(x[["Species"]]) dt_set_cleanfactorlevels <- function(x, prefix_digits = NA) { stopifnot(inherits(x, "data.table")) idi <- names(x)[sapply(x, is.factor)] for (i in idi) { x[, (i) := factor( x[[i]], labels = clean_names(levels(x[[i]]), prefix_digits = prefix_digits) ) ] } } # /rtemis::dt_set_cleanfactorlevels #' Get N and percent match of values between two columns of two data.tables #' #' @param x data.table: First input data.table. #' @param y data.table: Second input data.table. #' @param on Integer or character: column to read in `x` and `y`, if it is the #' same #' @param left_on Integer or character: column to read in `x` #' @param right_on Integer or character: column to read in `y` #' @param verbosity Integer: Verbosity level. #' #' @return list. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table(ID = 1:5, Alpha = letters[1:5]) #' y <- data.table(ID = c(3, 4, 5, 6), Beta = LETTERS[3:6]) #' dt_pctmatch(x, y, on = "ID") dt_pctmatch <- function( x, y, on = NULL, left_on = NULL, right_on = NULL, verbosity = 1L ) { if (is.null(left_on)) { left_on <- on } if (is.null(right_on)) { right_on <- on } xv <- unique(x[[left_on]]) n <- length(xv) yv <- unique(y[[right_on]]) nmatch <- sum(xv %in% yv) matchpct <- nmatch / n * 100 if (verbosity > 0L) { by_final <- paste(unique(c(left_on, right_on)), collapse = ", ") msg0( "Matched ", highlight(nmatch), "/", highlight(n), " on ", bold(by_final), " (", highlight(ddSci(matchpct)), "%)" ) } invisible(list(nmatch = nmatch, matchpct = matchpct)) } # /rtemis::dt_pctmatch #' Get percent of missing values from every column #' #' @param x data.frame or data.table #' @param verbosity Integer: Verbosity level. #' #' @return list #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table(a = c(1, 2, NA, 4), b = c(NA, NA, 3, 4), c = c("A", "B", "C", NA)) #' dt_pctmissing(x) dt_pctmissing <- function(x, verbosity = 1L) { nmissing <- sapply(x, \(i) sum(is.na(i))) pctmissing <- nmissing / NROW(x) if (verbosity > 0L) { cat("Percent missing per column:\n") printls(pctmissing, print_class = FALSE) } invisible(list(nmissing = nmissing, pctmissing = pctmissing)) } # /rtemis::dt_pctmissing #' Convert data.table logical columns to factors #' #' Convert data.table logical columns to factors with custom labels ***in-place*** #' #' @param x data.table: Input data.table. Will be modified ***in-place***. #' @param cols Optional Integer or character: columns to convert. If NULL, operates on all #' logical columns. #' @param labels Character: labels for factor levels. #' @param maintain_attributes Logical: If TRUE, maintain column attributes. #' @param fillNA Optional Character: If not NULL, fill NA values with this constant. #' #' @return data.table, invisibly. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table(a = 1:5, b = c(TRUE, FALSE, FALSE, FALSE, TRUE)) #' x #' dt_set_logical2factor(x) #' x #' z <- data.table( #' alpha = 1:5, #' beta = c(TRUE, FALSE, TRUE, NA, TRUE), #' gamma = c(FALSE, FALSE, TRUE, FALSE, NA) #' ) #' # You can usee fillNA to fill NA values with a constant #' dt_set_logical2factor(z, cols = "beta", labels = c("No", "Yes"), fillNA = "No") #' z #' w <- data.table(mango = 1:5, banana = c(FALSE, FALSE, TRUE, TRUE, FALSE)) #' w #' dt_set_logical2factor(w, cols = 2, labels = c("Ugh", "Huh")) #' w #' # Column attributes are maintained by default: #' z <- data.table( #' alpha = 1:5, #' beta = c(TRUE, FALSE, TRUE, NA, TRUE), #' gamma = c(FALSE, FALSE, TRUE, FALSE, NA) #' ) #' for (i in seq_along(z)) setattr(z[[i]], "source", "Guava") #' str(z) #' dt_set_logical2factor(z, cols = "beta", labels = c("No", "Yes")) #' str(z) dt_set_logical2factor <- function( x, cols = NULL, labels = c("False", "True"), maintain_attributes = TRUE, fillNA = NULL ) { if (is.null(cols)) { cols <- names(x)[sapply(x, is.logical)] } for (i in cols) { if (maintain_attributes) { .attr <- attributes(x[[i]]) } x[, (i) := factor(x[[i]], levels = c(FALSE, TRUE), labels = labels)] if (!is.null(fillNA)) { x[is.na(x[[i]]), (i) := fillNA] } if (maintain_attributes) { for (j in seq_along(.attr)) { setattr(x[[i]], names(.attr)[j], .attr[[j]]) } } } invisible(x) } #' Inspect column types #' #' Will attempt to identify columns that should be numeric but are either character or #' factor by running [inspect_type] on each column. #' #' @param x data.table: Input data.table. #' @param cols Character vector: columns to inspect. #' @param verbosity Integer: Verbosity level. #' #' @return Character vector. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table( #' id = 8001:8006, #' a = c("3", "5", "undefined", "21", "4", NA), #' b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"), #' c = c(1, 2, 3, 4, 5, 6) #' ) #' dt_inspect_types(x) dt_inspect_types <- function(x, cols = NULL, verbosity = 1L) { if (is.null(cols)) { char_factor_idi <- which(sapply(x, is.character) | sapply(x, is.factor)) cols <- names(x[, .SD, .SDcols = char_factor_idi]) } current_types <- sapply(x[, .SD, .SDcols = cols], class) suggested_types <- sapply( cols, \(cn) inspect_type(x[[cn]], xname = cn, verbosity = verbosity) ) to_convert <- suggested_types != current_types names(to_convert)[to_convert] } #' Set column types automatically #' #' This function inspects a data.table and attempts to identify columns that should be #' numeric but have been read in as character, and fixes their type ***in-place***. #' This can happen when one or more fields contain non-numeric characters, for example. #' #' @param x data.table: Input data.table. Will be modified ***in-place***, if needed. #' @param cols Character vector: columns to work on. If not defined, will work on all #' columns #' @param verbosity Integer: Verbosity level. #' #' @return data.table, invisibly. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table( #' id = 8001:8006, #' a = c("3", "5", "undefined", "21", "4", NA), #' b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"), #' c = c(1, 2, 3, 4, 5, 6) #' ) #' str(x) #' # ***in-place*** operation means no assignment is needed #' dt_set_autotypes(x) #' str(x) #' #' # Try excluding column 'a' from autotyping #' x <- data.table( #' id = 8001:8006, #' a = c("3", "5", "undefined", "21", "4", NA), #' b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"), #' c = c(1, 2, 3, 4, 5, 6) #' ) #' str(x) #' # exclude column 'a' from autotyping #' dt_set_autotypes(x, cols = setdiff(names(x), "a")) #' str(x) dt_set_autotypes <- function(x, cols = NULL, verbosity = 1L) { if (is.null(cols)) { cols <- names(x) } character_idx <- sapply(x[, .SD, .SDcols = cols], is.character) char_cols <- names(character_idx)[character_idx] for (i in char_cols) { if (inspect_type(x[[i]], i, verbosity = 0L) == "numeric") { if (verbosity > 0L) { msg("Converting", highlight(i), "to", bold("numeric")) } # This will generate warnings if there are non-numeric values suppressWarnings({ x[, (i) := as.numeric(x[[i]])] }) } } invisible(x) } # /rtemis::dt_set_autotypes #' List column names by attribute #' #' @param x data.table: Input data.table. #' @param attribute Character: name of attribute. #' @param exact Logical: If TRUE, use exact matching. #' @param sorted Logical: If TRUE, sort the output. #' #' @return Character vector. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- data.table( #' id = 1:5, #' sbp = rnorm(5, 120, 15), #' dbp = rnorm(5, 80, 10), #' paO2 = rnorm(5, 90, 10), #' paCO2 = rnorm(5, 40, 5) #' ) #' setattr(x[["id"]], "source", "demographics") #' setattr(x[["sbp"]], "source", "outpatient") #' setattr(x[["dbp"]], "source", "outpatient") #' setattr(x[["paO2"]], "source", "icu") #' setattr(x[["paCO2"]], "source", "icu") #' #' dt_names_by_attr(x, "source", "outpatient") dt_names_by_attr <- function(x, attribute, exact = TRUE, sorted = TRUE) { attrs <- unlist(lapply(x, \(i) attr(i, attribute))) attrs <- sapply(x, \(i) { .attr <- attr(i, attribute, exact = exact) if (is.null(.attr)) "NA" else .attr }) vals <- unique(attrs) if (sorted) { sapply(vals, \(i) sort(names(x)[attrs == i])) } else { sapply(vals, \(i) names(x)[attrs == i]) } } # /rtemis::dt_names_by_attr #' Clean column names and factor levels ***in-place*** #' #' @param x data.table: Input data.table. Will be modified ***in-place***, if needed. #' @param prefix_digits Character: prefix to add to names beginning with a #' digit. Set to NA to skip #' #' @return Nothing, modifies `x` ***in-place***. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' x <- as.data.table(iris) #' levels(x[["Species"]]) <- c("setosa:iris", "versicolor$iris", "virginica iris") #' names(x) #' levels(x[["Species"]]) #' # ***in-place*** operation means no assignment is needed #' dt_set_clean_all(x) #' names(x) #' levels(x[["Species"]]) dt_set_clean_all <- function(x, prefix_digits = NA) { if (!is.data.table(x)) { cli::cli_abort("{.arg x} must be a data.table") } data.table::setnames(x, names(x), clean_colnames(x)) idi <- names(x)[sapply(x, is.factor)] for (i in idi) { x[, (i) := factor( x[[i]], labels = clean_names(levels(x[[i]]), prefix_digits = prefix_digits) ) ] } } # /rtemis::dt_set_clean_all #' Describe data.table #' #' @param x data.table: Input data.table. #' @param verbosity Integer: If > 0, print output to console. #' #' @return List with three data.tables: Numeric, Categorical, and Date. #' #' @author EDG #' @export #' #' @examples #' library(data.table) #' origin <- as.POSIXct("2022-01-01 00:00:00", tz = "America/Los_Angeles") #' x <- data.table( #' ID = paste0("ID", 1:10), #' V1 = rnorm(10), #' V2 = rnorm(10, 20, 3), #' V1_datetime = as.POSIXct( #' seq( #' 1, 1e7, #' length.out = 10 #' ), #' origin = origin #' ), #' V2_datetime = as.POSIXct( #' seq( #' 1, 1e7, #' length.out = 10 #' ), #' origin = origin #' ), #' C1 = sample(c("alpha", "beta", "gamma"), 10, TRUE), #' F1 = factor(sample(c("delta", "epsilon", "zeta"), 10, TRUE)) #' ) dt_describe <- function(x, verbosity = 1L) { if (!is.data.table(x)) { cli::cli_abort("{.arg x} must be a data.table") } nrows <- NROW(x) # appease R CMD check: do not use ..var in DT frame, use with = FALSE instead # Numeric index_nm <- which(sapply(x, is.numeric)) nm_summary <- if (length(index_nm) > 0) { data.frame( Variable = x[, index_nm, with = FALSE] |> names(), Min = sapply(x[, index_nm, with = FALSE], min, na.rm = TRUE), Max = sapply(x[, index_nm, with = FALSE], max, na.rm = TRUE), Median = sapply(x[, index_nm, with = FALSE], median, na.rm = TRUE), Mean = sapply(x[, index_nm, with = FALSE], mean, na.rm = TRUE), SD = sapply(x[, index_nm, with = FALSE], sd, na.rm = TRUE), Pct_missing = sapply( x[, index_nm, with = FALSE], \(col) sum(is.na(col)) / nrows ) ) } else { data.frame( Variable = character(), Min = numeric(), Max = numeric(), Median = numeric(), Mean = numeric(), SD = numeric(), Pct_missing = numeric() ) } # Characters & factors index_cf <- c(which(sapply(x, is.character)), which(sapply(x, is.factor))) cf_summary <- if (length(index_cf) > 0) { data.frame( Variable = x[, index_cf, with = FALSE] |> names(), N_unique = sapply( x[, index_cf, with = FALSE], \(col) length(unique(col)) ), Mode = sapply(x[, index_cf, with = FALSE], get_mode), Counts = sapply(x[, index_cf, with = FALSE], describe), Pct_missing = sapply( x[, index_cf, with = FALSE], \(col) sum(is.na(col)) / nrows ) ) } else { data.frame( Variable = numeric(), N_unique = integer(), Mode = character(), Counts = character(), Pct_missing = numeric() ) } # Dates index_dt <- which(sapply( x, \(col) any(class(col) %in% c("Date", "IDate", "POSIXct", "POSIXt")) )) dt_summary <- if (length(index_dt) > 0) { data.frame( Variable = x[, index_dt, with = FALSE] |> names(), Min = do.call(c, lapply(x[, index_dt, with = FALSE], min, na.rm = TRUE)), Max = do.call(c, lapply(x[, index_dt, with = FALSE], max, na.rm = TRUE)), Median = do.call( c, lapply(x[, index_dt, with = FALSE], median, na.rm = TRUE) ), Mean = do.call( c, lapply(x[, index_dt, with = FALSE], mean, na.rm = TRUE) ), Pct_missing = sapply( x[, index_dt, with = FALSE], \(col) sum(is.na(col)) / nrows ) ) } else { data.frame( Variable = character(), Min = numeric(), Max = numeric(), Median = numeric(), Mean = numeric(), Pct_missing = numeric() ) } out <- list( Numeric = nm_summary, Categorical = cf_summary, Date = dt_summary ) if (verbosity > 0L) { printls(out, print_df = TRUE) } invisible(out) } # /rtemis::dt_describe ================================================ FILE: R/utils_date.R ================================================ # utils_date.R # ::rtemis:: # 2024- EDG rtemis.org #' Extract features from dates #' #' @details weekday and month will be extracted as factors, year as integer #' #' @param dates Date vector. #' @param features Character vector: features to extract. #' @param drop_dates Logical: If TRUE, drop original date column. #' #' @return data.table with extracted features #' #' @author EDG #' @keywords internal #' @noRd dates2features <- function( dates, features = c("weekday", "month", "year"), drop_dates = TRUE ) { # appease R CMD check weekday <- NULL # to factors: dow, month dt <- data.table(dates = dates) if ("weekday" %in% features) { dt[, weekday := factor(weekdays(dates))] } if ("month" %in% features) { dt[, month := factor(months(dates))] } if ("year" %in% features) { dt[, year := year(dates)] } if (drop_dates) { dt[, dates := NULL] } dt } # /rtemis::dates2features #' Get holidays from date vector #' #' @param dates Date vector #' @param holidays Character vector: holidays to extract #' #' @return Factor of length `length(dates)` with levels "Not Holiday", "Holiday" #' #' @author EDG #' @keywords internal #' @noRd get_holidays <- function( dates, holidays = c("LaborDay", "NewYearsDay", "ChristmasDay") ) { # Get years from dates years <- unique(data.table::year(dates)) # Get all holidays in all years .holidays <- do.call( "c", lapply(years, function(year) { do.call( "c", lapply(holidays, function(holiday) { timeDate::as.Date.timeDate(timeDate::holiday( year = year, Holiday = holiday )) }) ) }) ) # Return intersection of dates and holidays holidays_fct <- factor( rep(0, length(dates)), levels = c(0, 1), labels = c("Not Holiday", "Holiday") ) holidays_fct[dates %in% .holidays] <- "Holiday" holidays_fct } # /rtemis::get_holidays #' Date to factor time bin #' #' Convert Date to time bin factor. #' #' Order of levels will be chronological (important e.g. for plotting) #' Additionally, can output ordered factor with `ordered = TRUE` #' #' @param x Date vector #' @param time_bin Character: "year", "quarter", "month", or "day" #' @param make_bins Character: "range" or "preseent". If "range" the factor levels will include all #' time periods define by `time_bin` within `bin_range`. This means factor levels can be #' empty. Otherwise, if "present", factor levels only include time periods present in data. #' @param bin_range Date, vector, length 2: Range of dates to make levels for. Defaults to range of #' input dates `x`. #' @param ordered Logical: If TRUE, factor output is ordered. #' #' @return factor of time periods #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' library(data.table) #' startDate <- as.Date("2018-01-01") #' endDate <- as.Date("2020-12-31") #' time <- sample(seq(startDate, endDate, length.out = 100)) #' date2factor(time) #' date2factor(time, "quarter") #' date2factor(time, "month") #' date2factor(time, "day") #' # range vs. present #' x <- sample(seq(as.Date("2018-01-01"), as.Date("2021-01-01"), by = 1), 10) #' date2factor(x, time_bin = "quarter", make_bins = "present") #' date2factor(x, time_bin = "quarter", make_bins = "range") date2factor <- function( x, time_bin = c("year", "quarter", "month", "day"), make_bins = c("range", "present"), bin_range = range(x, na.rm = TRUE), ordered = FALSE ) { time_bin <- match.arg(time_bin) make_bins <- match.arg(make_bins) if (time_bin == "year") { if (make_bins == "present") { factor(data.table::year(x), ordered = ordered) } else { out <- as.character(data.table::year(x)) factor( out, levels = as.character(seq( data.table::year(bin_range[1]), data.table::year(bin_range[2]) )), ordered = ordered ) } } else if (time_bin == "quarter") { if (make_bins == "present") { factor( paste0(data.table::year(x), " Q", data.table::quarter(x)), ordered = ordered ) } else { factor( paste0(data.table::year(x), " Q", data.table::quarter(x)), levels = levels(date2yq(seq( bin_range[1], bin_range[2], by = "quarter" ))), ordered = ordered ) } } else if (time_bin == "month") { ym <- paste(substr(months(x), 1, 3), data.table::year(x)) if (make_bins == "present") { .levels <- unique(ym[order(x)]) factor(ym, levels = .levels, ordered = ordered) } else { factor( ym, levels = levels(date2ym(seq(bin_range[1], bin_range[2], by = "month"))), ordered = ordered ) } } else if (time_bin == "day") { if (make_bins == "present") { factor(x, levels = as.character(unique(x[order(x)])), ordered = ordered) } else { factor( x, levels = as.character(seq(bin_range[1], bin_range[2], by = 1)), ordered = ordered ) } } } # /rtemis::date2factor #' Date to year-quarter factor #' #' @param x Date vector #' @param ordered Logical: If TRUE, return ordered factor. #' #' @author EDG #' @keywords internal #' @noRd date2yq <- function(x, ordered = FALSE) { factor( paste0(data.table::year(x), " Q", data.table::quarter(x)), ordered = ordered ) } # /rtemis::date2yq #' Date to year-month factor #' #' @param x Date vector #' @param ordered Logical: If TRUE, return ordered factor. #' #' @author EDG #' @keywords internal #' @noRd date2ym <- function(x, ordered = FALSE) { ym <- paste(substr(months(x), 1, 3), data.table::year(x)) .levels <- unique(ym[order(x)]) factor(ym, levels = .levels, ordered = ordered) } # /rtemis::date2ym ================================================ FILE: R/utils_df.R ================================================ # dataops # ::rtemis:: # 2021 EDG rtemis.org #' Get names by string matching or class #' #' @details #' For `getnames()` only: #' `pattern`, `starts_with`, and `ends_with` are applied sequentially. #' If more than one is provided, the result will be the intersection of all matches. #' #' #' @param x object with `names()` method. #' @param pattern Character: pattern to match anywhere in names of x. #' @param starts_with Character: pattern to match in the beginning of names of x. #' @param ends_with Character: pattern to match at the end of names of x. #' @param ignore_case Logical: If TRUE, well, ignore case. #' #' @return Character vector of matched names. #' #' @author EDG #' @export #' #' @examples #' getnames(iris, starts_with = "Sepal") #' getnames(iris, ends_with = "Width") #' getfactornames(iris) #' getnumericnames(iris) getnames <- function( x, pattern = NULL, starts_with = NULL, ends_with = NULL, ignore_case = TRUE ) { .names <- if (is.character(x)) { x } else { names(x) } # Apply filters sequentially if (!is.null(pattern)) { .names <- .names[grep(pattern, .names, ignore.case = ignore_case)] } if (!is.null(starts_with)) { .names <- .names[ grep(paste0("^", starts_with), .names, ignore.case = ignore_case) ] } if (!is.null(ends_with)) { .names <- .names[ grep(paste0(ends_with, "$"), .names, ignore.case = ignore_case) ] } .names } # /rtemis::getnames #' Get names by string matching multiple patterns #' #' @details #' `pattern`, `starts_with`, and `ends_with` are applied and the union of all matches is returned. #' `pattern` can be a character vector of multiple patterns to match. #' #' @param x Character vector or object with `names()` method. #' @param pattern Character vector: pattern(s) to match anywhere in names of x. #' @param starts_with Character: pattern to match in the beginning of names of x. #' @param ends_with Character: pattern to match at the end of names of x. #' @param ignore_case Logical: If TRUE, well, ignore case. #' @param return_index Logical: If TRUE, return integer index of matches instead of names. #' #' @return Character vector of matched names or integer index. #' #' @author EDG #' @export #' #' @examples #' mgetnames(iris, pattern = c("Sepal", "Petal")) #' mgetnames(iris, starts_with = "Sepal") #' mgetnames(iris, ends_with = "Width") mgetnames <- function( x, pattern = NULL, starts_with = NULL, ends_with = NULL, ignore_case = TRUE, return_index = FALSE ) { .names <- if (is.character(x)) x else names(x) idi <- numeric() if (!is.null(pattern)) { idi <- c( idi, unlist(lapply( pattern, function(p) grep(p, .names, ignore.case = ignore_case) )) ) } if (!is.null(starts_with)) { idi <- c(idi, which(startsWith(.names, starts_with))) } if (!is.null(ends_with)) { idi <- c(idi, which(endsWith(.names, ends_with))) } idi <- unique(idi) if (return_index) { idi } else { .names[idi] } } # Get factor/numeric/logical/character names from data.frame/data.table ---- # @param x data.frame or data.table (or data.frame-compatible object) # @return Character vector of column names of x with the specified class. #' #' @rdname getnames #' @export getfactornames <- function(x) names(x)[sapply(x, is.factor)] #' @rdname getnames #' @export getnumericnames <- function(x) names(x)[sapply(x, is.numeric)] #' @rdname getnames #' @export getlogicalnames <- function(x) names(x)[sapply(x, is.logical)] #' @rdname getnames #' @export getcharacternames <- function(x) names(x)[sapply(x, is.character)] #' @rdname getnames #' @export getdatenames <- function(x) { date_id <- sapply( x, \(v) class(v)[1] %in% c("Date", "IDate", "POSIXct", "POSIXlt") ) names(x)[date_id] } #' Get data.frame names and types #' #' @param x data.frame / data.table or similar #' @return character vector of column names with attribute "type" holding the class of each #' column #' #' @author EDG #' @export #' #' @examples #' getnamesandtypes(iris) getnamesandtypes <- function(x) { xnames <- names(x) attr(xnames, "type") <- sapply(x, class) xnames } # /rtemis::namesandtypes #' Unique values per feature #' #' Get number of unique values per features #' #' @param x matrix or data frame input #' @param excludeNA Logical: If TRUE, exclude NA values from unique count. #' #' @return Vector, integer of length `NCOL(x)` with number of unique #' values per column/feature #' #' @author EDG #' @export #' #' @examples #' df_nunique_perfeat(iris) df_nunique_perfeat <- function(x, excludeNA = FALSE) { if (excludeNA) { apply(x, 2, function(i) length(unique(na.exclude(i)))) } else { apply(x, 2, function(i) length(unique(i))) } } # /rtemis::df_nunique_perfeat #' Move data frame column #' #' @param x data.frame. #' @param colname Character: Name of column you want to move. #' @param to Integer: Which column position to move the vector to. #' Default = `ncol(x)` i.e. the last column. #' #' @return data.frame #' #' @author EDG #' @export #' #' @examples #' ir <- df_movecolumn(iris, colname = "Species", to = 1L) df_movecolumn <- function(x, colname, to = ncol(x)) { if (!is.data.frame(x)) { cli::cli_abort("Input {.arg x} must be a data frame.") } check_character(colname, allow_null = FALSE) to <- clean_int(to) if (NCOL(x) < 2) { cli::cli_abort("Input data.frame {.arg x} must have at least 2 columns.") } if (!(colname %in% names(x))) { cli::cli_abort("Column {.val {colname}} not found in input data frame.") } ncols <- ncol(x) if (to < 1L || to > ncols) { cli::cli_abort("{.arg to} must be between 1 and {.val {ncols}}.") } xnames <- setdiff(names(x), colname) x[, append(xnames, colname, after = to - 1L)] } # /rtemis::df_movecolumn #' Vector to data.frame #' #' Convert vector to 1-row data.frame, maintaining names if present #' #' @param x Vector. #' @param col_names Character: Name of the vector. #' #' @return data.frame. #' #' @author EDG #' @keywords internal #' @noRd vec2df <- function(x, col_names = NULL) { if (!is.vector(x)) { cli::cli_abort("Input must be a vector") } if (!is.null(col_names)) { names(x) <- col_names } as.data.frame(t(x)) } # /rtemis::vec2df ================================================ FILE: R/utils_exec.R ================================================ # utils_exec.Ranger # ::rtemis:: # 2025 EDG rtemis.org #' Do call with tryCatch and suggestion #' #' @param fn Function to call. #' @param args List of arguments to pass to function. #' @param error_pattern_suggestion Named list of the form pattern = "suggestion". If the pattern is #' found in the error message, the suggestion is appended to the error message. #' @param warning_pattern_suggestion Named list of the form pattern = "suggestion". If the pattern is #' #' @return Result of function call. #' #' @author EDG #' @keywords internal #' @noRd do_call <- function( fn, args, error_pattern_suggestion = NULL, warning_pattern_suggestion = NULL ) { call <- parent.frame(n = 1L) common_errors <- list( "object '(.*)' not found" = "Check that the object exists and is spelled correctly.", "object of type 'closure' is not subsettable" = "Check that the object is a list or data.frame." ) common_warnings <- list( "NAs introduced by coercion" = "Check that the input is of the correct type.", # "glm.fit: algorithm did not converge" = # "Same reasons as for 'glm.fit: fitted probabilities numerically 0 or 1 occurred'.", "glm.fit: fitted probabilities numerically 0 or 1 occurred" = paste( bold("Reasons for this warning include:"), "1) Perfect Separation of classes.", "2) Highly Imbalanced data.", "3) Extreme values in predictors.", "4) Too many predictors for the number of observations.", "5) Multicollinearity.", bold("Suggestion:"), "Try using GLMNET or tree-based algorithms", sep = "\n " ) ) err_pat_sug <- c(common_errors, error_pattern_suggestion) warn_pat_sug <- c(common_warnings, warning_pattern_suggestion) tryCatch( { withCallingHandlers( { do.call(fn, args) }, warning = function(w) { fnwarn <- conditionMessage(w) message("Warning caught: ", fnwarn) idi <- which(sapply( names(warn_pat_sug), function(i) grepl(i, fnwarn) )) if (length(idi) > 0) { for (i in idi) { cat(orange(warn_pat_sug[[i]], "\n")) } } invokeRestart("muffleWarning") } # /warning ) # /withCallingHandlers }, error = function(e) { fnerr <- e[["message"]] errmsg <- paste0(highlight(fn), " failed with error:\n", fnerr, "\n") idi <- which(sapply(names(err_pat_sug), function(i) grepl(i, fnerr))) if (length(idi) > 0) { suggestions <- sapply(idi, function(i) err_pat_sug[[i]]) errmsg <- paste0( red(errmsg), orange( paste0( bold("\nSuggestion:\n "), paste0(suggestions, collapse = "\n ") ) ) ) } cat("\n") cli::cli_abort(errmsg, call = call) } # /error ) # /tryCatch } # /rtemis::do_call ================================================ FILE: R/utils_files.R ================================================ # utils_files.R # ::rtemis:: # 2025 EDG rtemis.org #' Expand, normalize, concatenate, clean path #' #' @param ... Character: Parts of path to concatenate. #' @param expand_path Logical: If TRUE, expand concatenated path using [path.expand]. #' #' @return Character: Path. #' #' @author EDG #' @keywords internal #' @noRd make_path <- function(..., expand_path = TRUE) { path <- list(...) # Remove final "/" path <- lapply(path, \(x) gsub("\\/$", "", x)) # Concat path <- do.call(file.path, path) # Expand if (expand_path) { path <- path.expand(path) } path } # /rtemis::make_path ================================================ FILE: R/utils_html.R ================================================ # html_ops.R # ::rtemis:: # 2023- EDG rtemis.org #' @keywords internal #' @noRd html_highlight <- function(..., bold = TRUE) { if (bold) { span(..., style = "color: #16A0AC; font-weight: 700;") } else { span(..., style = "color: #16A0AC;") } } #' @keywords internal #' @noRd html_orange <- function(..., bold = TRUE) { if (bold) { span(..., style = "color: #FA6E1E; font-weight: 700;") } else { span(..., style = "color: #FA6E1E;") } } #' @keywords internal #' @noRd html_red <- function(..., bold = TRUE) { if (bold) { span(..., style = "color: #E61048; font-weight: 700;") } else { span(..., style = "color: #E61048;") } } #' @keywords internal #' @noRd html_success <- function(..., bold = TRUE) { if (bold) { span(..., style = "color: #32A03E; font-weight: 700;") } else { span(..., style = "color: #32A03E;") } } ================================================ FILE: R/utils_io.R ================================================ # utils_io.R # ::rtemis:: # 2022 EDG rtemis.org #' Write \pkg{rtemis} model to RDS file #' #' @param object `Supervised` object. #' @param outdir Path to output directory. #' @param file_prefix Character: Prefix for filename. #' @param verbosity Integer: Verbosity level. #' #' @author EDG #' @keywords internal #' @noRd rt_save <- function( object, outdir, file_prefix, print_load_info = TRUE, verbosity = 1L ) { # Message before expanding outdir to preserve privacy when using relative paths. if (verbosity > 0L) { start_time <- Sys.time() msg0( "Writing data to ", outdir, "...", caller = NA, newline = FALSE ) } outdir <- sanitize_path(outdir, must_exist = FALSE, type = "any") if (!dir.exists(outdir)) { dir.create(outdir, recursive = TRUE, showWarnings = FALSE) } rds_path <- file.path(outdir, paste0(file_prefix, ".rds")) try(saveRDS(object, rds_path)) if (verbosity > 0L) { elapsed <- Sys.time() - start_time } if (file.exists(rds_path)) { if (verbosity > 0L) { yay(format(elapsed, digits = 2), gray(" [rt_save]"), sep = "") if (print_load_info) { msg0(gray( paste0( "Reload with: ", "> obj <- readRDS('", rds_path, "')" ) )) } } } else { if (verbosity > 0L) { nay( "Failed after ", format(elapsed, digits = 2), gray(" [rt_save]"), sep = "" ) } cli::cli_abort("Error: Saving model to ", outdir, " failed.") } } # /rtemis::rt_save #' Check file(s) exist #' #' @param paths Character vector of paths #' @param verbosity Integer: Verbosity level. #' @param pad Integer: Number of spaces to pad to the left #' #' @author EDG #' #' @keywords internal #' @noRd check_files <- function(paths, verbosity = 1L, pad = 0) { if (verbosity > 0L) { msg0("Checking ", singorplu(length(paths), "file"), ":") } for (f in paths) { if (file.exists(f)) { if (verbosity > 0L) { yay(f, pad = pad) } } else { if (verbosity > 0L) { nay(paste(f, red(" not found!")), pad = pad) } cli::cli_abort("File not found") } } } # /rtemis::check_files # %% sanitize_path ---- #' Sanitize and validate file paths for security #' #' Validates and normalizes file paths to prevent security vulnerabilities #' including command injection, path traversal, and unauthorized file access. #' #' @param path Character: File or directory path to sanitize. #' @param must_exist Logical: If TRUE, abort if path does not exist. Default = FALSE. #' @param allowed_base Character: Optional base directory to restrict paths to. If provided, #' the normalized path must be within this directory. Default = NULL (no restriction). #' @param allow_urls Logical: If TRUE, allow URL schemes (http://, https://, etc.). #' Default = FALSE. #' @param type Character: Expected path type - "file", "directory", or "any". Only checked #' if `must_exist = TRUE`. Default = "any". #' #' @return Character: Sanitized and normalized absolute path. #' #' @details #' Security checks performed: #' - Rejects paths starting with pipe character (prevents command injection in R readers) #' - Rejects paths containing null bytes #' - Rejects URL schemes unless `allow_urls = TRUE` #' - Normalizes path to absolute form #' - Optionally validates path exists and is correct type #' - Optionally validates path is within allowed base directory #' #' @author EDG #' @keywords internal #' @noRd sanitize_path <- function( path, must_exist = FALSE, allowed_base = NULL, allow_urls = FALSE, type = c("any", "file", "directory") ) { type <- match.arg(type) # Check for NULL or empty if (is.null(path) || length(path) == 0L || nchar(path) == 0L) { cli::cli_abort("Path cannot be NULL or empty.") } # Check for multiple paths if (length(path) > 1L) { cli::cli_abort("Function accepts a single path. Got {length(path)} paths.") } # Check for null bytes (check if raw bytes contain 0x00) if (any(charToRaw(path) == 0L)) { cli::cli_abort("Path contains null byte: {.file {path}}") } # Check for pipe character at start (command injection vector) if (grepl("^\\s*\\|", path)) { cli::cli_abort("Path cannot start with pipe character: {.file {path}}") } # Check for URL schemes unless explicitly allowed if (!allow_urls && grepl("^[a-zA-Z][a-zA-Z0-9+.-]*://", path)) { cli::cli_abort("URL schemes not allowed: {.file {path}}") } # Normalize to absolute path # mustWork = FALSE allows non-existent paths, will check separately if needed normalized_path <- normalizePath(path, winslash = "/", mustWork = FALSE) # Validate against allowed base directory if specified if (!is.null(allowed_base)) { allowed_base_norm <- normalizePath( allowed_base, winslash = "/", mustWork = TRUE ) # Check if normalized path starts with allowed base if (!startsWith(normalized_path, allowed_base_norm)) { cli::cli_abort( "Path {.file {path}} is outside allowed directory: {.file {allowed_base}}" ) } } # Check existence and type if required if (must_exist) { if (!file.exists(normalized_path)) { cli::cli_abort("Path does not exist: {.file {normalized_path}}") } if (type == "file" && dir.exists(normalized_path)) { cli::cli_abort("Path is not a file: {.file {normalized_path}}") } if (type == "directory" && !dir.exists(normalized_path)) { cli::cli_abort("Path is not a directory: {.file {normalized_path}}") } } normalized_path } # /rtemis::sanitize_path ================================================ FILE: R/utils_lightgbm.R ================================================ # utils_lightgbm.R # ::rtemis:: # 2023- EDG rtemis.org # %% prepare_lgb_data ---- #' Prepare data for LightGBM-based learners #' #' Shared data preparation for LightGBM, LightRF, and LightCART. #' Converts factors to 0-based integers, removes the outcome from #' `categorical_feature`, and creates `lgb.Dataset` objects. #' #' @param x tabular data: Training set (features + outcome in last column). #' @param dat_validation Optional tabular data: Validation set. #' @param type Character: "Classification" or "Regression". #' @param weights Optional numeric vector: Case weights for training data. #' @param verbosity Integer: Verbosity level. #' #' @return Named list with elements: #' - `train_data`: `lgb.Dataset` for training. #' - `valid_data`: `lgb.Dataset` for validation, or NULL. #' - `preprocessor`: `Preprocessor` object if factors were converted, or NULL. #' #' @author EDG #' @keywords internal #' @noRd prepare_lgb_data <- function( x, dat_validation = NULL, type, weights = NULL, verbosity = 1L ) { # Factor-to-integer preprocessing ---- factor_index <- names(x)[which(sapply(x, is.factor))] if (length(factor_index) > 0L) { prp <- preprocess( x, config = setup_Preprocessor( factor2integer = TRUE, factor2integer_startat0 = TRUE ), dat_validation = dat_validation, verbosity = verbosity ) if (is.null(dat_validation)) { x <- prp@preprocessed } else { x <- prp@preprocessed[["training"]] dat_validation <- prp@preprocessed[["validation"]] } } else { prp <- NULL } # Remove outcome from factor_index (outcome is last column). # For Classification, the outcome is a factor that was also converted; # it must not be listed as a categorical feature. if (type == "Classification" && length(factor_index) > 0L) { factor_index <- factor_index[seq_len(length(factor_index) - 1L)] } # Create lgb.Datasets ---- train_data <- lightgbm::lgb.Dataset( data = as.matrix(features(x)), categorical_feature = factor_index, label = outcome(x), weight = weights ) valid_data <- if (!is.null(dat_validation)) { lightgbm::lgb.Dataset( data = as.matrix(features(dat_validation)), categorical_feature = factor_index, label = outcome(dat_validation) ) } list( train_data = train_data, valid_data = valid_data, preprocessor = prp ) } # /rtemis::prepare_lgb_data #' Get LightGBM Booster Trees #' #' @return A list of trees #' #' @author EDG #' @keywords internal #' @noRd get_lgb_tree <- function(x, n_iter = -1) { out <- lapply( jsonlite::fromJSON( lightgbm::lgb.dump( booster = x, num_iteration = n_iter ), simplifyVector = FALSE )[["tree_info"]], \(y) y[["tree_structure"]] ) names(out) <- paste0("Tree_", seq_along(out)) out } # /rtemis::get_lgb_tree # preorderlgb ---- #' Preorder Traversal of LightGBM Tree #' #' Called by `lgbtree2rules` and operates on `tree` environment in place. #' #' @param tree Environment that will hold the extracted rules. #' @param node LightGBM tree. #' @param rule Character: current rule. #' @param left Character: left child label. #' @param right Character: right child label. #' @param split_feature Character: split feature label. #' @param threshold Character: threshold label. #' @param right_cat_type Character: "in" or "notin": operator for right categorical. #' @param xnames Character vector: feature names. #' @param factor_levels Named list of factor levels. #' @param verbosity Integer: Verbosity level. #' #' @return Character vector of rules. #' #' @author EDG #' @keywords internal #' @noRd preorderlgb <- function( tree, node, rule = "TRUE", left = "left_child", right = "right_child", split_feature = "split_feature", threshold = "threshold", right_cat_type = "in", xnames, factor_levels, verbosity = 0L ) { if (is.null(node[[split_feature]])) { names(rule) <- "leaf" if (verbosity > 0L) { message("Reached a leaf; rule is ", rule, ".") } tree[["leafs"]] <- c(tree[["leafs"]], rule) return(rule) } rule_left <- paste0( rule, " & ", xnames[node[[split_feature]] + 1], decision_left(node[["decision_type"]]), fmt_thresh( catsplit = node[["decision_type"]] == "==", feature = xnames[node[[split_feature]] + 1], threshold = node[["threshold"]], factor_levels = factor_levels ) ) rule_right <- paste0( rule, " & ", xnames[node[[split_feature]] + 1], decision_right(node[["decision_type"]], right_cat_type), fmt_thresh_right( catsplit = node[["decision_type"]] == "==", feature = xnames[node[[split_feature]] + 1], threshold = node[["threshold"]], factor_levels = factor_levels, cat_type = right_cat_type ) ) rule_left <- preorderlgb( tree, node[[left]], rule_left, left, right, split_feature, threshold, right_cat_type = right_cat_type, xnames = xnames, factor_levels = factor_levels, verbosity = verbosity ) rule <- c(rule, rule_left) rule_right <- preorderlgb( tree, node[[right]], rule_right, left, right, split_feature, threshold, right_cat_type = right_cat_type, xnames = xnames, factor_levels = factor_levels, verbosity = verbosity ) rule <- c(rule, rule_right) } # /rtemis::preorderlgb # lgbtree2rules ---- lgbtree2rules <- function(x, xnames, factor_levels, right_cat_type = "in") { tree <- new.env() tree[["leafs"]] <- character() preorderlgb( tree, x, xnames = xnames, right_cat_type = right_cat_type, factor_levels = factor_levels ) # remove root node "TRUE & " substr(tree[["leafs"]], 8, 99999) } # /rtemis::lgbtree2rules # lgb2rules ---- #' Convert LightGBM Booster to set of rules #' #' @param x LightGBM Booster object #' @param n_iter Integer: Number of trees to convert to rules #' @param xnames Character vector: Names of features #' #' @return Character vector of rules #' #' @author EDG #' @keywords internal #' @noRd lgb2rules <- function( Booster, n_iter = NULL, xnames, factor_levels, right_cat_type = "in", return_unique = TRUE, verbosity = 1L ) { if (verbosity > 0L) { msgstart("Extracting LightGBM rules...") } if (is.null(n_iter)) { n_iter <- length(Booster) } trees <- get_lgb_tree(Booster, n_iter) rules <- lapply(trees, function(x) { lgbtree2rules( x, xnames, factor_levels = factor_levels, right_cat_type = right_cat_type ) }) |> unlist() if (verbosity > 0L) { msgdone() } if (return_unique) unique(rules) else rules } # /rtemis::lgb2rules # extract_rules.lgb.Booster ---- #' author EDG #' @keywords internal #' @noRd method(extract_rules, class_lgb.Booster) <- function( x, n_iter = NULL, xnames, factor_levels, right_cat_type = "in", return_unique = TRUE, verbosity = 1L ) { if (verbosity > 0L) { msgstart("Extracting LightGBM rules...") } if (is.null(n_iter)) { n_iter <- length(x) } trees <- get_lgb_tree(x, n_iter) rules <- lapply(trees, function(x) { lgbtree2rules( x, xnames, factor_levels = factor_levels, right_cat_type = right_cat_type ) }) |> unlist() rules <- if (return_unique) { unique(rules) } else { rules } if (verbosity > 0L) { msgdone() msg0( "Extracted ", highlight(length(rules)), ifelse(return_unique, " unique", ""), " rules." ) } rules } # /rtemis::extract_rules.lgb.Booster # decision_left ---- #' @author EDG #' @keywords internal #' @noRd decision_left <- function(decision_type) { switch(decision_type, "<=" = " <= ", "==" = " %in% ") } # /rtemis::decision_left #' @author EDG #' @keywords internal #' @noRd decision_right <- function(decision_type, cat_type) { switch( decision_type, "<=" = " > ", "==" = if (cat_type == "in") " %in% " else " %notin% " ) } # /rtemis::decision_right #' Format rule thresholds #' #' @param catsplit Logical: If TRUE, feature is categorical #' @param feature Character: feature name #' @param threshold Character: threshold as reported by lightgbm #' @param factor_levels Named list of factor levels. Names should correspond to training #' set column names. #' #' @author EDG #' @keywords internal #' @noRd fmt_thresh <- function(catsplit, feature, threshold, factor_levels) { if (catsplit) { flevels <- as.integer(strsplit(threshold, "\\|\\|")[[1]]) + 1 # 0- to 1-based factor level index flevels <- factor_levels[[feature]][flevels] paste0( "c(", paste0("'", flevels, "'", collapse = ","), ")" ) } else { threshold } } # /rtemis::fmt_thresh #' @author EDG #' @keywords internal #' @noRd fmt_thresh_right <- function( catsplit, feature, threshold, factor_levels, cat_type ) { if (catsplit) { flevels <- as.integer(strsplit(threshold, "\\|\\|")[[1]]) + 1 # 0- to 1-based factor level index flevels <- factor_levels[[feature]][flevels] if (cat_type == "in") { flevels <- setdiff(factor_levels[[feature]], flevels) } paste0( "c(", paste0("'", flevels, "'", collapse = ","), ")" ) } else { threshold } } # /rtemis::fmt_thresh_right ================================================ FILE: R/utils_palettes.R ================================================ # palettes.R # ::rtemis:: # 2016- EDG rtemis.org # Colors ---- #' @keywords internal #' @noRd ucsfCol <- c( Navy = "#052049", A2 = "#0F388A", A3_CTA_Blue = "#006BE9", B3_Blue = "#178CCB", B5 = "#B8E6FA", B6 = "#E2F4FC", C1 = "#0E5258", C2 = "#14828C", C3_Teal = "#16A0AC", C4 = "#60D0DA", C5 = "#B4E2E8", D1 = "#00483A", D2 = "#007242", D3_Green = "#32A03E", E3_Chartreuse = "#84C234", E4_Point_Reyes = "#B4DC55", F1 = "#2E2872", F2 = "#443E8C", F3_Purple = "#6C62D0", F4_Yosemite = "#8A8CE3", F5 = "#C0C0EA", G1 = "#461850", G2 = "#6C247C", G3_Violet = "#A238BA", G4 = "#C45ED8", G5 = "#EACCF0", H1 = "#561038", H2 = "#821A56", H3_Magenta = "#C42882", H4 = "#E266AE", H5 = "#F2C2DE", I3_Blue_Gray = "#506380", I6 = "#F2F3F4", J2 = "#878D96", J3_Cool_Gray = "#B4B9BF", J5 = "#E1E3E5", K3_Gray = "#D1D3D3", L3_Yellow = "#FEB80A", M3_Orange = "#FA6E1E", N3_Red = "#E61048" ) # UC Davis ---- #' UC Davis Colors #' #' `ucdCol`: UC Davis color palette #' (https://marketingtoolbox.ucdavis.edu/visual-identity/color.html) #' @keywords internal #' @noRd ucdCol <- c( davisBlue = "#002855", davisGold = "#DAAA00", unitransRed = "#BA0C2F", westernRedbud = "#C6007E", californiaPoppy = "#ED8B00", goldenLupine = "#FFCD00", sunnyGrass = "#78BE20", skyBlue = "#00B5E2", recPoolBlue = "#008EAA", wineGrape = "#642667", muBrick = "#C26E60", hartHallStucco = "#E6A65D", sageGreen = "#9CAF88", evergreen = "#00573F", winterSkyGray = "#5B7F95", centennialWalkGray = "#B1B3B3", corkOak = "#ACA39A", southHallShingleBrown = "#4F2C1D" ) # Berkeley ---- #' Berkeley Colors #' #' `berkeleyCol`: Berkeley color palette #' (https://brand.berkeley.edu/colors/) #' #' @keywords internal #' @noRd berkeleyCol <- c( Berkeley_Blue = "#003262", Founders_Rock = "#3B7EA1", California_Gold = "#FDB515", Medalist = "#C4820E", Wellman_Tile = "#D9661F", Rose_Garden = "#EE1F60", Golden_Gate = "#ED4E33", South_Hall = "#6C3302", Bay_Fog = "#DDD5C7", Lawrence = "#00B0DA", LapLane = "#00A598", Pacific = "#46535E", Sather_Gate = "#B9D3B6", Ion = "#CFDD45", Soy_Bean = "#859438", Stone_Pine = "#584F29", Grey = "#EEEEEE", Web_Grey = "#888888" ) # UC Santa Cruz ---- #' UC Santa Cruz Colors #' #' `ucscCol`: UC Santa Cruz color palette #' (https://communications.ucsc.edu/visual-design/color/) #' #' @keywords internal #' @noRd ucscCol <- c( blue = "#003c6c", gold = "#fdc700", mediumBlue = "#006aad", lightBlue = "#13a5dc", teal = "#007988", orange = "#f29813", yellow = "#ffbf00", green = "#93c02d", rubineRed = "#da216d" ) # UC Merced ---- #' UC Merced Colors #' #' `ucmercedCol`: UC Merced color palette #' (https://publicrelations.ucmerced.edu/color-guidelines) #' #' @keywords internal #' @noRd ucmercedCol <- c( mercedRiverBlue = "#092f44", foothillsGold = "#a29061", sierraSkyBlue = "#5f8498", wildflowerBabyBlueEyes = "#2980b9", yosemiteSnowWhite = "#F8F5EC", halfDomeSlate = "#5B5B5B", mercedRyeGreen = "#235B16" ) # UC Santa Barbara ---- #' UC Santa Barbara Colors #' #' `ucsbCol`: UC Santa Barbara color palette #' (https://www.ucsb.edu/visual-identity/color) #' #' @keywords internal #' @noRd ucsbCol <- c( navy = "#003660", gold = "#FEBC11", aqua = "#04859B", moss = "#7A8D39", seaGreen = "#0BA89A", coral = "#EF5645", mist = "#9CBEBE", clay = "#DCD6CC", sandstone = "#C9BF9D", lightGray = "#DCE1E5" ) # UCLA ---- #' UCLA Colors #' #' `uclaCol`: UCLA color palette (http://brand.ucla.edu/identity/colors) #' #' @keywords internal #' @noRd uclaCol <- c( Blue = "#2774AE", Gold = "#FFD100", Darkest_Blue = "#003B5C", Darker_Blue = "#005587", Lighter_Blue = "#8BB8E8", Lightest_Blue = "#C3D7EE", Darkest_Gold = "#FFB81C", Darker_Gold = "#FFC72C", Yellow = "#FFFF00", Green = "#00FF87", Magenta = "#FF00A5", Cyan = "#00FFFF", Purple = "#8237FF" ) # UC Riverside ---- #' UC Riverside Colors #' #' `ucrCol`: UC Riverside color palette (https://brand.ucr.edu/ucr-colors) #' #' @keywords internal #' @noRd ucrColor <- c( ucrBlue = "#2d6cc0", ucrGold = "#f1ab00", ucrGray = "#393b41" ) # UCI ---- #' UCI Colors #' #' `uciCol`: UCI color palette (https://communications.uci.edu/campus-resources/graphic-standards/colors.php) #' #' @keywords internal #' @noRd uciCol <- c( blue = "#0064a4", yellow = "#ffd200", teal = "#6aa2b8", lightGray = "#c6beb5", navy = "#1b3d6d", orange = "#f78d2d", darkGray = "#555759", lightYellow = "#f7eb5f" ) # UC San Diego ---- #' UC San Diego Colors #' #' `ucsdCol`: UC San Diego color palette #' (https://ucpa.ucsd.edu/brand/elements/color-palette/) #' #' @keywords internal #' @noRd ucsdCol <- c( blue = "#182B49", mediumBlue = "#006A96", gold = "#C69214", yellow = "#FFCD00", cyan = "#00C6D7", green = "#6E963B", lightYellow = "#F3E500", orange = "#FC8900", coolGray = "#747678", lightGray = "#B6B1A9", darkGold = "#84754E" ) # University of California ---- #' University of California Colors #' #' `californiaCol`: University of California color palette #' (http://brand.universityofcalifornia.edu/guidelines/color.html#!primary-colors) #' #' @keywords internal #' @noRd ucCol <- c( ucBlue = "#1295D8", ucGold = "#FFB511", blue = "#005581", lightBlue = "#72CDF4", gold = "#FFD200", lightgold = "#FFE552", orange = "#FF6E1B", lightOrange = "#FF8F28", pink = "#E44C9A", lightPink = "#FEB2E0", teal = "#00778B", lightTeal = "#00A3AD", ucGray = "#7C7E7F", warmGray8 = "#8F8884", warmGray3 = "#BEB6AF", warmGray1 = "#DBD5CD", metallicGold = "#B4975A" ) # Stanford ---- #' Stanford Colors #' #' `stanfordCol`: Stanford color palette #' (https://identity.stanford.edu/color.html#digital-color) #' #' @keywords internal #' @noRd stanfordCol <- c( Cardinal = "#8c1515", Cool_Grey = "#4d4f53", Black = "#2e2d29", Bright_Red = "#B1040E", Chocolate = "#2F2424", Stone = "#544948", Fog = "#F4F4F4", Light_Sandstone = "#F9F6EF", Sandstone = "#d2c295", Warm_Grey = "#3f3c30", Beige = "#9d9573", Light_Sage = "#c7d1c5", Clay = "#5f574f", Cloud = "#dad7cb", Driftwood = "#b6b1a9", Sandhill = "#b3995d", Palo_Alto = "#175e54", Teal = "#00505c", Purple = "#53284f", Redwood = "#8d3c1e", Brown = "#5e3032", Sky = "#0098db", Lagunita = "#007c92", Mint = "#009b76", Gold = "#b26f16", Sun = "#eaab00", Poppy = "#e98300" ) # California State University ---- #' California State University Colors #' #' `csuCol`: California State University color palette #' (https://www2.calstate.edu/csu-system/csu-branding-standards/Documents/CSU-Brand-Guidelines-8-2018.pdf) #' #' @keywords internal #' @noRd csuCol <- c( red = "#CC0B2A", coolGray = "#D9D9D6", black = "#2F2F2F" ) # California Polytechnic State University ---- #' California Polytechnic State University Colors #' #' `calpolyCol`: Cal Poly color palette #' (https://universitymarketing.calpoly.edu/brand-guidelines/colors/) #' #' @keywords internal #' @noRd calpolyCol <- c( calpolygreen = "#154734", calpolygold = "#C69214", stadiumgold = "#F8E08E", polycanyon = "#F2C75C", dextergreen = "#A4D65E", farmersmarket = "#3A913F", skyblue = "#B5E3D8", surfblue = "#5CB8B2", serenity = "#D3E3F4", morroblue = "#ABCAE9", missionbeige = "#E4E1D1", pismosand = "#CAC7A7", coastsage = "#B6CCC2", sycamore = "#789F90", kennedygray = "#8E9089", sealgray = "#54585A", heritageorange = "#FF6A39", avodaco = "#D0DF00" ) # Caltech ---- #' Caltech Colors #' #' `caltechCol`: Caltech color palette (http://identity.caltech.edu/colors) #' #' @keywords internal #' @noRd caltechCol <- c( orange = "#FF6C0C", coolGray9 = "#76777B", coolGray3c = "#C8C8C8", pms414 = "#AAA99F", pms5497c = "#849895", pms7494c = "#9DAE88", pms451c = "#C7B784", pms7403c = "#F1D384", pms548c = "#003B4C", pms3292c = "#005851", pms668c = "#644B78", pms195c = "#7A303F", pms186c = "#CF0A2C", pms299c = "#00A1DF", pms7473c = "#1E988A", pms7489c = "#73A950", pms7408c = "#F9BE00", pms605c = "#E2CC00", pms1915c = "#F54D80" ) # Scripps Research ---- #' Scripps Research Colors #' #' `scrippsCol`: Scripps Research color palette #' #' @keywords internal #' @noRd scrippsCol <- c( yellow = "#edb035", orange = "#f1624f", maroon = "#610f37", blue = "#273d78", teal = "#116f79", lightblue = "#59c3d3" ) # Penn ---- #' rtemis Color Palettes #' #' `pennCol`: Penn color palette #' (http://www.upenn.edu/about/styleguide-color-type) #' #' @keywords internal #' @noRd pennCol <- c( darkestBlue = "#000f3a", darkerBlue = "#00144d", blue = "#01256e", lighterBlue = "#045ea7", lightestBlue = "#82afd3", darkestRed = "#57000a", darkerRed = "#74000e", red = "#95001a", lighterRed = "#c2004d", lightestRed = "#e180a6", darkestYellow = "#af7f00", darkerYellow = "#eaa900", yellow = "#f2c100", lighterYellow = "#f8de00", lightestYellow = "#fcef80", darkestGreen = "#005200", darkerGreen = "#006e00", green = "#008e00", lighterGreen = "#00be00", lightestGreen = "#80df80", darkestOrange = "#812d00", darkerOrange = "#ac3c00", orange = "#c35a00", lighterOrange = "#df9700", lightestOrange = "#efcb80", darkestPurple = "#23001f", darkerPurple = "#2f0029", purple = "#4a0042", lighterPurple = "#890082", lightestPurple = "#c480c1" ) #' `pennPalette`: Subset of `pennCol`. #' #' @keywords internal #' @noRd pennPalette <- pennCol[c( "lighterBlue", "red", "green", "yellow", "lighterPurple", "orange", "lightestBlue", "lighterRed", "lighterGreen", "lightestPurple", "lighterOrange" )] #' `pennLightPalette`: Subset of `pennCol`. This is the lighter Penn palette for use with #' the dark themes #' #' @keywords internal #' @noRd pennLightPalette <- pennCol[c( "lightestBlue", "lightestRed", "lightestGreen", "lightestYellow", "lightestPurple" )] # CMU ---- #' CMU Colors #' #' `cmuCol`: Carnegie Mellon color palette #' (https://www.cmu.edu/marcom/brand-standards/web-standards.html#colors) #' #' @keywords internal #' @noRd cmuCol <- c( cmuRed = "#bb0000", gray = "#e0e0e0", darkGray = "#666666", gold = "#aa6600", teal = "#006677", blue = "#224477", green = "#008855", darkGreen = "#224433" ) # MIT ---- #' MIT Colors #' #' `mitCol`: MIT color palette #' (http://web.mit.edu/graphicidentity/colors.html) #' #' @keywords internal #' @noRd mitCol <- c( red = "#A31F34", gray = "#8A8B8C", lightGray = "#C2C0BF" ) # Princeton ---- #' Princeton Colors #' #' `princetonCol`: Princeton color palette #' (https://communications.princeton.edu/guides-tools/logo-graphic-identity) #' #' @keywords internal #' @noRd princetonCol <- c( orangeOnWhite = "#e77500", orangeOnBlack = "#f58025" ) # Columbia ---- #' Columbia Colors #' #' `columbiaCol`: Columbia color palette #' (https://visualidentity.columbia.edu/content/web-0) #' #' @keywords internal #' @noRd columbiaCol <- c( blue = "#000d74", blue1 = "#C4D8E2", blue2 = "#75AADB", blue3 = "#6CADDF", blue4 = "#008EE0", blue5 = "#2C6BAC", blue6 = "#0046A6", white = "#F9F9F9", lightGray = "#EFEFEF", sandstone = "#D2D2C0", gray = "#555555", slate = "#41516C", tarawera = "#093552", yellow = "#FFB400", lightGreen = "#C0CD3F", lime = "#90C134", orange = "#C14D00", red = "#841C1C", purple = "#8E0F56" ) # Brown ---- #' Brown Colors #' #' `brownCol`: Brown color palette #' (https://www.brown.edu/university-identity/sites/university-identity/files/Brown_Visual_Identity_Policy_2016-07-22.pdf) #' #' @keywords internal #' @noRd brownCol <- c( red = "#ED1C24", brown = "#4E3629", gold = "#FFC72C", gray = "#98A4AE", skyBlue = "#59CBE8", emerald = "#00B398", navy = "#003C71", taupe = "#B7B09C" ) # Yale ---- #' Yale Colors #' #' `yaleCol`: Yale color palette (https://yaleidentity.yale.edu/web) #' #' @keywords internal #' @noRd yaleCol <- c( yaleBlue = "#00356b", mediumBlue = "#286dc0", lightBlue = "#63aaff", darkestGray = "#222222", darkGray = "#4a4a4a", sandstone = "#978d85", lightGray = "#dddddd", lightestGray = "#f9f9f9", green = "#5f712d", orange = "#bd5319" ) # Cornell ---- #' Cornell Colors #' #' `cornellCol`: Yale color palette #' (https://brand.cornell.edu/design-center/colors/ #' #' @keywords internal #' @noRd cornellCol <- c( carnellian = "#B31B1B", darkGrey = "#222222", lightGrey = "#F7F7F7", linkBlue = "#006699", greenGraphics = "#6EB43F", greenText = "#4B7B2B", greenLargeText = "#578E32", orangeGraphics = "#F8981D", orangeLargeText = "#D47500", redGraphics = "#EF4035", redText = "#DF1E12", navy = "#073949", darkWarmGrey = "#A2998B", seaGrey = "#9FAD9F" ) # Harvard Medical School ---- #' HMS Colors #' #' `hmsCol`: Harvard Medical School color palette #' (https://identityguide.hms.harvard.edu/color) #' #' @keywords internal #' @noRd hmsCol <- c( crimson = "#A51C30", black = "#1E1E1E", mortar = "#8C8179", parchment = "#F3F3F1", slate = "#8996A0", shade = "#BAC5C6", indigo = "#293352", blueBonnet = "#4E84C4", ivy = "#52854C", pear = "#C3D7A4", lemon = "#FFDB6D", saffron = "#D16103", gold = "#C4961A", creme = "#F4EDCA" ) # Dartmouth ---- #' Dartmouth Colors #' #' `dartmouthCol`: Dartmouth color palette #' (https://communications.dartmouth.edu/visual-identity/design-elements/color-palette#web%20palette) #' #' @keywords internal #' @noRd dartmouthCol <- c( dartmouthGreen = "#00693e", forestGreen = "#12312b", webGray1 = "#f7f7f7", webGray2 = "#e2e2e2", graniteGray = "#424141", autumnBrown = "#643c20", bonfireRed = "#9d162e", tuckOrange = "#e32d1c", summerYellow = "#f5dc69", springGreen = "#c4dd88", riverNavy = "#003c73", riverBlue = "#267aba", webViolet = "#8a6996", bonfireOrange = "#ffa00f" ) # USF ---- #' USF Colors #' #' `usfCol`: USF color palette #' (https://myusf.usfca.edu/marketing-communications/resources/graphics-resources/brand-standards/color-palette) #' Color conversions performed using https://www.pantone.com/color-finder/ #' #' @keywords internal #' @noRd usfCol <- c( green = "#205C40", yellow = "#ffb81c", gray = "#75787B" ) # Washington ---- #' University of Washington Colors #' #' `uwCol`: University of Washington color palette #' (http://www.washington.edu/brand/graphic-elements/primary-color-palette/) #' #' @keywords internal #' @noRd uwCol <- c( purple = "#4b2e83", gold = "#b7a57a", metallicGold = "#85754d" ) # Johns Hopkins ---- #' Johns Hopkins University Colors #' #' `jhuCol`: Johns Hopkins University color palette #' (https://brand.jhu.edu/color/) #' #' @keywords internal #' @noRd jhuCol <- c( heritageBlue = "#002d72", spiritBlue = "#68ace5", orange = "#cf4520", maroon = "#76232f", pink = "#a15a95", green = "#009b77", blue = "#0072ce", yellow = "#f1c400", pms7407c = "#cba052", pms1375c = "#ff9e1b", pms1505c = "#ff6900", pms7586c = "#9e5330", pms4625c = "#4f2c1d", pms486c = "#e8927c", pms187c = "#a6192e", pms262c = "#51284f", pms666c = "#a192b2", pms279c = "#418fde", pms564c = "#86c8bc", pms7734c = "#286140", pms7490c = "#719949" ) # NYU ---- #' NYU Colors #' #' `nyuCol`: NYU color palette #' (https://www.nyu.edu/employees/resources-and-services/media-and-communications/styleguide/website/graphic-visual-design.html) #' #' @keywords internal #' @noRd nyuCol <- c( brightPurple = "#8900e1", nyuPurple = "#57068c", darkerPurple = "#330662", darkestPurple = "#220337", mediumGray = "#6d6d6d", lightGray = "#b8b8b8", lighterGray = "#d6d6d6", lightestGray = "#f2f2f2", red = "#cb0200", orange = "#e86c00", green = "#489141", blue = "#28619e", lightBlue = "#3dbbdb", accentGreen = "#007c70", brightRed = "#d71e5e", brightOrange = "#e86c00", yellow = "#ffc107" ) # WashU ---- #' Washington University St Louis Colors #' #' `washuCol`: WashU color palette #' (https://marcomm.wustl.edu/resources/branding-logo-toolkit/color-palettes/) #' #' @keywords internal #' @noRd washuCol <- c( red = "#a51417", gray = "#6c7373", lightGray = "#c8c8c8", darkGray = "#3d3d3d", extraLightGRAY = "#eeeeee", green = "#007360", darkGreen = "#173e3a", lightGreen = "#789b4a", tan = "#e1c4ac", darkBlue = "#172752", blue = "#005f85", pearl = "#d8d2c5", yellow = "#ffcc00", orange = "#d15f27", darkOrange = "#b85323", purple = "#622466", lightTurqoise = "#67c8c7", turqoise = "#2b8282" ) # Chicago ---- #' U Chicago Colors #' #' `chicagoCol`: University of Chicago color palette #' (https://news.uchicago.edu/sites/default/files/attachments/_uchicago.identity.guidelines.pdf) #' #' @keywords internal #' @noRd chicagoCol <- c( maroon = "#800000", darkGray = "#767676", lightGray = "#D6D6CE", yellowTint = "#FFB547", yellowCore = "#FFA319", yellowShade = "#C68220", orangeTint = "#D49464", orangeCore = "#C16622", orangeShade = "#9A5324", redTint = "#B1746F", redCore = "#8F3931", redShade = "#642822", lightGreenTint = "#ADB17D", lightGreenCore = "#8A9045", lightGreenShade = "#616530", darkGreenTint = "#8A8B79", darkGreenCore = "#58593F", darkGreenShade = "#3E3E23", blueTint = "#5B8FA8", blueCore = "#155F83", blueShade = "#0F425C", violetTint = "#725663", violetCore = "#350E20", cyan = "#47B5FF", magenta = "#FF3399" ) # Penn State ---- #' Penn State Colors #' #' `texasCol`: Penn State color palette #' (https://brand.psu.edu/design-essentials.html#color) #' #' @keywords internal #' @noRd pennstateCol <- c( nittanyNavy = "#001E44", beaverBlue = "#1E407C", pennsylvaniaSky = "#009CDE", limestone = "#91959C", creek = "#3EA39E", slate = "#314D64", pennsForest = "#4A7729", oldCoaly = "#54585A", landGrant = "#6A3028", lionsRoar = "#BF8226", lionShrine = "#B88965", statelyAtherton = "#AC8DCE", pughBlue = "#96BEE6", original1887 = "#BC204B", brightkeystone = "#FFD100", inventOrange = "#E98300", dawnOfDiscovery = "#F2665E", perpetualWonder = "#491D70", greenOpportunity = "#008755", futuresCalling = "#99CC00" ) # SFSU ---- #' SF State #' #' `sfsuCol`: SF State color palette #' (https://logo.sfsu.edu/color-system) #' #' @keywords internal #' @noRd sfsuCol <- c( `2755C` = "#231161", `2755C_85pc` = "#463077", `117C` = "#C99700", `117C_60pc` = "#E9D597", `3025C` = "#004F71", `383C` = "#ABAD00", `7419C` = "#B04A5A", `484C` = "#9A3324", coolGray11 = "#53565A" ) # U Illinois ---- #' University of Illinois Colors #' #' `illinoisCol`: University of Illinois color palette #' (https://www.uillinois.edu/OUR/brand/color_palettes) #' #' @keywords internal #' @noRd illinoisCol <- c( uofiblue = "#13294b", urbanaOrange = "#E84A27", uicRed = "#D50032", uisBlue = "#003366", teal = "#0d605e", grayBlue = "#6fafc7", citron = "#bfd46d", darkYellow = "#ffd125", salmon = "#ee5e5e", periwinkle = "#4f6898", gray = "#E8E9EA", coolGray6 = "#A5A8AA", coolGray1 = "#5E6669", secondaryBlue1 = "#0455A4", secondaryBlue2 = "#1F4096" ) # U Maryland ---- #' University of Maryland Colors #' #' `umdCol`: University of Maryland color palette #' (https://osc.umd.edu/licensing-trademarks/brand-standards/logos/#color) #' #' @keywords internal #' @noRd umdCol <- c( umdRed = "#E21833", umdYellow = "#ffd200", umdBrown = "#AD7C59" ) # MSU ---- #' Michigan State University Colors #' #' `msuCol`: MSU color palette #' (https://brand.msu.edu/visual/color-palette) #' #' @keywords internal #' @noRd msuCol <- c( SpartanGreen = "#18453B", White = "#FFFFFF", Black = "#000000", KellyGreen = "#008208", LimeGreen = "#7BBD00", ExcellenceGreen = "#0B9A6D" ) # Michigan ---- #' University of Michigan Colors #' #' `michiganCol`: Michigan color palette #' (https://brand.umich.edu/design-resources/colors/) #' #' @keywords internal #' @noRd michiganCol <- c( Maize = "#FFCB05", Blue = "#00274C", TappanRed = "#9A3324", RossOrange = "#D86018", RackhamGreen = "#75988d", WaveFieldGreen = "#A5A508", TaubmanTeal = "#00B2A9", ArboretumBlue = "#2F65A7", A2Amethyst = "#702082", MatthaeiViolet = "#575294", UMMATan = "#CFC096", BurtonTowerBeige = "#9B9A6D", AngelHallAsh = "#989C97", LawQuadStone = "#655A52", PumaBlack = "#131516" ) # Iowa ---- #' Univeristy of Iowa Colors #' #' `iowaCol`: University of Iowa color palette #' (https://brand.uiowa.edu/color) #' #' @keywords internal #' @noRd iowaCol <- c( Gold = "#FFCD00", Gray = "#BBBCBC", Blue = "#00A9E0", Green = "#00AF66", Orange = "#FF8200", DarkGray = "#63666A", DarkBlue = "#00558C", DarkGreen = "#00664F", DarkOrange = "#BD472A" ) # U Texas ---- #' U Texas Colors #' #' `texasCol`: University of Texas color palette #' (https://brand.utexas.edu/identity/color/) #' #' @keywords internal #' @noRd texasCol <- c( burntOrange = "#bf5700", gray = "#333f48", brightOrange = "#f8971f", yellow = "#ffd600", lightGreen = "#a6cd57", green = "#579d42", teal = "#00a9b7", blue = "#005f86", lightBlue = "#9cadb7", stone = "#d6d2c4" ) # Emory ---- #' Emory Colors #' #' `emoryCol`: Emory color palette #' (https://brand.emory.edu/color.html) #' #' @keywords internal #' @noRd emoryCol <- c( emoryBlue = "#012169", darkBlue = "#0c2340", mediumBlue = "#0033a0", lightBlue = "#007dba", yellow = "#f2a900", gold = "#b58500", metallicGold = "#84754e", cyan = "#00aeef", skyBlue = "#41b6e6", teal = "#487f84", kellyGreen = "#348338", seaGreen = " #006c5b", olive = "#5c8118", orange = "#c35413", red = "#da291c", magenta = "#c6007e", purple = "#6558b1", grape = "#6d2077", black = "#101820", coolGray5 = "#b1b3b3", coolGray2 = "#d0d0ce", coolGray1 = "#d9d9d6" ) # Georgia Tech ---- #' Georgia Tech Colors #' #' `techCol`: Georgia Tech color palette #' (http://www.licensing.gatech.edu/super-block/239) #' #' @keywords internal #' @noRd techCol <- c( techGold = "#B3A369", buzzGold = "#EAAA00", blue = "#00263A" ) # Vanderbilt ---- #' Vanderbilt Color #' #' `vanderbiltCol`: Vanderbilt color palette #' (https://www.vanderbilt.edu/communications/brand/color.php) #' #' @keywords internal #' @noRd vanderbiltCol <- c( gold = "#D8AB4C", blue = "#006682", red = "#993D1B", darkGray = "#333333", green = "#464E21", lightBlue = "#CCE0E6", lightRed = "#EBD8D1", lightGray = "#DDDDDD", lightGreen = "#DADCD3" ) # Jefferson ---- #' Jefferson University Colors #' #' `jeffersonCol`: Jefferson color palette (http://creative.jefferson.edu/downloads/Jefferson-Brand-Guidelines.pdf) #' #' @keywords internal #' @noRd jeffersonCol <- c( jeffDeepBlue = "#152456", jeffBrightBlue = "#59B7df", legacyMaroon = "#9f2943", red = "#e53e30", voltGreen = "#ece819", silver = "#dfe1df", darkGray = "#8e9089", black = "#231f20" ) # Hawaii ---- #' University of Hawaii Colors #' #' `hawaiiCol`: University of Hawaii color palette (https://www.hawaii.edu/offices/eaur/graphicsstandards.pdf) #' #' @keywords internal #' @noRd hawaiiCol <- c( manoa = "#024731", hilo = "#DA291C", westOahu = "#A71930", hawaiiCC = "#91004B", honoluluCC = "#00747A", kapiolaniCC = "#002395", kauaiCC = "#716FB3", leeward = "#3D7EDB", mauiCC = "#005172", windward = "#7AB800", system = "#B3995D" ) # NIH ---- #' NIH Colors #' #' `nihCol`: NIH color palette (https://www.nlm.nih.gov/about/nlm_logo_guidelines_030414_508.pdf) #' #' @keywords internal #' @noRd nihCol <- c( blue = "#20558a", gray = "#616265" ) # Imperial ---- #' Imperial Colours #' #' `imperialCol`: Imperial College London colour palette #' (https://www.imperial.ac.uk/brand-style-guide/visual-identity/brand-colours/) #' #' @keywords internal #' @noRd imperialCol <- c( navy = "#002147", imperialBlue = "#003E74", lightGrey = "#EBEEEE", coolGrey = "#9D9D9D", lightBlue = "#D4EFFC", blue = "#006EAF", processBlue = "#0091D4", poolBlue = "#00ACD7", darkTeal = "#0F8291", teal = "#009CBC", seaglass = "#379f9f", darkGreen = "#02893B", kermitGreen = "#66A40A", lime = "#BBCE00", orange = "#D24000", tangerine = "#EC7300", lemonYellow = "#FFDD00", brick = "#A51900", red = "#DD2501", cherry = "#E40043", raspberry = "#9F004E", magentaPink = "#C81E78", iris = "#751E66", violet = "#960078", plum = "#321E6D", purple = "#653098" ) # UCL ---- #' UCL Colours #' #' `uclCol`: UCL colour palette (https://www.ucl.ac.uk/cam/brand/guidelines/colour) #' #' @keywords internal #' @noRd uclCol <- c( darkGreen = "#555025", darkRed = "#651D32", darkPurple = "#4B384C", darkBlue = "#003D4C", darkBrown = "#4E3629", midGreen = "#8F993E", midRed = "#93272C", midPurple = "#500778", midBlue = "#002855", stone = "#D6D2C4", brightGreen = "#B5BD00", brightRed = "#D50032", brightBlue = "#0097A9", brightPink = "#AC145A", lightGreen = "#BBC592", lightRed = "#E03C31", lightPurple = "#C6B0BC", lightBlue = "#8DB9CA", yellow = "#F6BE00", orange = "#EA7600", grey = "#8C8279", blueCeleste = "#A4DBE8", IOEblue = "#24509A" ) # Oxford ---- #' Oxford Colours #' #' `oxfordCol`: Oxford University colour palette (https://www.ox.ac.uk/sites/files/oxford/media_wysiwyg/Oxford%20Blue%20LR.pdf) #' #' @keywords internal #' @noRd oxfordCol <- c( oxfordBlue = "#002147", pantone279 = "#4891DC", pantone291 = "#9ECEEB", pantone5405 = "#44687D", pantone549 = "#5F9BAF", pantone551 = "#A1C4D0", pantone562 = "#007770", pantone624 = "#7BA296", pantone559 = "#BCD2C3", pantone576 = "#69913B", pantone578 = "#B9CF96", pantone580 = "#CEDBAF", pantone583 = "#AAB300", pantone585 = "#DBDE72", pantone587 = "#E3E597", pantone7412 = "#CF7A30", pantone129 = "#F5CF47", pantone127 = "#F3DE74", pantone202 = "#872434", pantone200 = "#BE0F34", pantone196 = "#EBC4CB", pantoneWarmGray6 = "#A79D96", pantoneWarmGray3 = "#C7C2BC", pantoneWarmGray1 = "#E0DED9" ) # NHS ---- #' NHS Colours #' #' `nhsCol`: NHS colour palette (https://www.england.nhs.uk/nhsidentity/identity-guidelines/colours/) #' #' @keywords internal #' @noRd nhsCol <- c( nhsDarkBlue = "#003087", nhsBlue = "#005EB8", nhsBrightBlue = "#0072CE", nhsLightBlue = "#41B6E6", nhsAquaBlue = "#00A9CE", nhsBlack = "#231f20", nhsDarkGrey = "#425563", nhsMidGrey = "#768692", nhsPaleGrey = "#E8EDEE", nhsDarkGreen = "#006747", nhsGreen = "#009639", nhsLightGreen = "#78BE20", nhsAquaGreen = "#00A499", nhsPurple = "#330072", nhsDarkPink = "#7C2855", nhsPink = "#AE2573", nhsDarkRed = "#8A1538", emergencyServicesRed = "#DA291C", nhsOrange = "#ED8B00", nhsWarmYellow = "#FFB81C", nhsYellow = "#FAE100" ) # UBC ---- #' UBC Colors #' #' `ubcCol`: UBC color palette (http://assets.brand.ubc.ca/downloads/ubc_colour_guide.pdf) #' #' @keywords internal #' @noRd ubcCol <- c( ubcBlue = "#002145", blue2 = "#0055B7", blue3 = "#00A7E1", blue4 = "#40B4E5", blue5 = "#6EC4E8", blue6 = "#97D4E9" ) # U Toronto ---- #' U Toronto Colors #' #' `torontoCol`: U Toronto color palette (https://trademarks.utoronto.ca/colors-fonts/) #' #' @keywords internal #' @noRd torontoCol <- c( blue = "#002043", red = "#bb133e" ) # McGill ---- #' McGill Colors #' #' `mcgillCol`: McGill color palette (https://www.mcgill.ca/visual-identity/visual-identity-guide) #' #' @keywords internal #' @noRd mcgillCol <- c( mcgillRed = "#ED1B2F", grey = "#5D6770", pastelOrange = "#FFD794", brightOrange = "#F7941D", mutedOrange = "#D3674A", darkOrange = "#AA4B31", pastelYellow = "#FFF193", brightYellow = "#FFD400", mutedYellow = "#E8B92E", darkYellow = "#B28C35", pastelTeal = "#B5E1E1", brightTeal = "#27BDBE", mutedTeal = "#087F8C", darkTeal = "#0A6266", pastelBlue = "#C8EAF5", brightBlue = "#44C8F5", mutedBlue = "#0096C9", darkBlue = "#024F6D", pastelGreen = "#D5E6A8", brightGreen = "#B2D235", mutedGreen = "#8BA04E", darkGreen = "#305534", pastelPink = "#E2A7CC", brightPink = "#C768A9", mutedPink = "#9B5678", darkPink = "#673567", darkRed = "#9E0918" ) # ETH ---- #' ETH Colours #' #' `ethCol`: ETH color palette (https://ethz.ch/services/en/service/communication/corporate-design/colour.html) #' #' @keywords internal #' @noRd ethCol <- c( eth1 = "#1F407A", eth2 = "#3C5A0F", eth3 = "#0069B4", eth4 = "#72791C", eth5 = "#91056A", eth6 = "#6F6F6E", eth7 = "#A8322D", eth8 = "#007A92", eth9 = "#956013", eth10 = "#82BE1E" ) # RWTH Aachen ---- #' RWTH Aachen Colours #' #' `rwthCol`: RWTH Aachen color palette (http://www9.rwth-aachen.de/global/show_document.asp?id=aaaaaaaaaadpbhq) #' #' @keywords internal #' @noRd rwthCol <- c( blau1 = "#00549F", blau2 = "#407FB7", blau3 = "#8EBAE5", blau4 = "#C7DDF2", blau5 = "#E8F1FA", magenta1 = "#E30066", magenta2 = "#E96088", magenta3 = "#F19EB1", magenta4 = "#F9D2DA", magenta5 = "#FDEEF0", gelb1 = "#FFED00", gelb2 = "#FFF055", gelb3 = "#FFF59B", gelb4 = "#FFFAD1", gelb5 = "#FFFDEE", petrol1 = "#006165", petrol2 = "#2D7F83", petrol3 = "#7DA4A7", petrol4 = "#BFD0D1", petrol5 = "#E6ECEC", tuerkis1 = "#0098A1", tuerkis2 = "#00B1B7", tuerkis3 = "#89CCCF", tuerkis4 = "#CAE7E7", tuerkis5 = "#EBF6F6", gruen1 = "#57AB27", gruen2 = "#8DC060", gruen3 = "#B8D698", gruen4 = "#DDEBCE", gruen5 = "#F2F7EC", maigruen1 = "#BDCD00", maigruen2 = "#D0D95C", maigruen3 = "#E0E69A", maigruen4 = "#F0F3D0", maigruen5 = "#F9FAED", orange1 = "#F6A800", orange2 = "#FABE50", orange3 = "#FDD48F", orange4 = "#FEEAC9", orange5 = "#FFF7EA", rot1 = "#CC071E", rot2 = "#D85C41", rot3 = "#E69679", rot4 = "#F3CDBB", rot5 = "#FAEBE3", bordeaux1 = "#A11035", bordeaux2 = "#B65256", bordeaux3 = "#CD8B87", bordeaux4 = "#E5C5C0", bordeaux5 = "#F5E8E5", violett1 = "#612158", violett2 = "#834E75", violett3 = "#A8859E", violett4 = "#D2C0CD", violett5 = "#EDE5EA", lila1 = "#7A6FAC", lila2 = "#9B91C1", lila3 = "#BCB5D7", lila4 = "#DEDAEB", lila5 = "#F2F0F7" ) # Mozilla ---- #' Mozilla Colors #' #' `mozillaCol`: Mozilla design colors #' (https://mozilla.design/mozilla/color/) #' #' @keywords internal #' @noRd mozillaCol <- c( neonBlue = "#00ffff", lemonYellow = "#fff44f", warmRed = "#ff4f5e", neonGreen = "#54ffbd", darkPurple = "#6e008b", darkGreen = "#005e5e", darkBlue = "#00458b", lightGrey = "#e7e5e2" ) # Firefox ---- #' Firefox Colors #' #' `firefoxCol`: Firefox design colors #' (https://mozilla.design/firefox/color/) #' #' @keywords internal #' @noRd firefoxCol <- c( Green = "#53FEBE", Blue = "#0290EE", Purple = "#AC71FF", Light_Purple = "#D64CF1", Magenta = "#FE4AA3", Salmon = "#FF6A75", Orange = "#FE8A4F", Yellow = "#FFBD4F" ) # Apple ---- #' Apple Colors #' #' `appleCol`: Apple Human Interface Guidelines color palette #' (https://developer.apple.com/design/human-interface-guidelines/ios/visual-design/color/) #' #' @keywords internal #' @noRd appleCol <- c( red = "#FF3B30", orange = "#FF9500", yellow = "#FFCC00", green = "#4CD964", tealBlue = "#5AC8FA", blue = "#007AFF", purple = "#5856D6", pink = "#FF2D55" ) # Google ---- #' Google Colors #' #' `googleCol`: Google brand palette (https://brandpalettes.com/google-colors/) #' #' @keywords internal #' @noRd googleCol <- c( blue = "#4285F4", red = "#DB4437", yellow = "#F4B400", green = "#0F9D58" ) # Amazon ---- #' Amazon Colors #' #' `amazonCol`: Amazon brand palette #' (https://images-na.ssl-images-amazon.com/images/G/01/AdvertisingSite/pdfs/AmazonBrandUsageGuidelines.pdf) #' #' @keywords internal #' @noRd amazonCol <- c( orange = "#FF9900", blue = "#146EB4" ) # Microsoft ---- #' Microsoft Colors #' #' `microsoftCol`: Microsoft brand palette #' (https://brandcolors.net/b/microsoft) #' #' @keywords internal #' @noRd microsoftCol <- c( orange = "#f65314", green = "#7cbb00", blue = "#00a1f1", yellow = "#ffbb00" ) # rtemis palettes ---- rtCol1 <- desaturate( c( ucsfCol[["C3_Teal"]], ucsfCol[["M3_Orange"]], pennCol[["lighterRed"]], pennCol[["lighterBlue"]], pennCol[["lighterOrange"]], pennCol[["lighterPurple"]], ucsfCol[["A3_CTA_Blue"]], pennCol[["lightestOrange"]], pennCol[["lightestPurple"]], pennCol[["blue"]], pennCol[["red"]], pennCol[["orange"]], pennCol[["purple"]] ), .3 ) rtCol1n <- desaturate( c( ucsfCol[["C3_Teal"]], ucsfCol[["M3_Orange"]], pennCol[["lighterBlue"]], pennCol[["lighterRed"]], pennCol[["lighterOrange"]], pennCol[["lighterPurple"]], pennCol[["lightestBlue"]], ucsfCol[["G4"]], pennCol[["lightestOrange"]], pennCol[["lightestPurple"]], pennCol[["blue"]], pennCol[["red"]], pennCol[["orange"]], pennCol[["purple"]] ), .3 ) rtCol2 <- c( ucsfCol[["C3_Teal"]], ucsfCol[["M3_Orange"]], ucsfCol[["H2"]], ucsfCol[["A2"]], ucsfCol[["C4"]], ucsfCol[["L3_Yellow"]], ucsfCol[["H3_Magenta"]], ucsfCol[["A3_CTA_Blue"]] ) rtms <- c( teal = ucsfCol[["C3_Teal"]], orange = ucsfCol[["M3_Orange"]], red = pennCol[["lighterRed"]], blue = pennCol[["lighterBlue"]], lighter_teal = ucsfCol[["C4"]], yellow = ucsfCol[["L3_Yellow"]], magenta = ucsfCol[["H3_Magenta"]], lighter_blue = ucsfCol[["A3_CTA_Blue"]] ) |> desaturate() rtcoldev <- c( rtemisblue = "#80ffff", rtemisbluetoo = "#00D6FF", lavender = "#ff80ffff", orange = "#ffb200ff" ) grays <- c( Gray10 = "gray10", Gray30 = "gray30", Gray50 = "gray50", Gray70 = "gray70", Gray90 = "gray90" ) # Pantone 2022 ---- pantoneBalancingAct <- c( Granite_Green = "#86A293", Muted_Clay = "#D29381", Very_Peri = "#6667AB", Hawthorne_Rose = "#884C5E", Dried_Moss = "#CCB97E", Elderberry = "#9D848E", Lotus = "#E3C1C0", Burnished_Lilac = "#C4AEB1" ) pantoneWellspring <- c( Eggshell_Blue = "#A1CAC9", Celery = "#CFBF54", Dewberry = "#8C5896", Chai_tea = "#B3832F", Greenbrier = "#48996B", Very_Peri = "#6667AB", Treetop = "#436A2F", Foliage = "#759F51" ) pantoneAmusements <- c( Tawny_Orange = "#D77E6F", Very_Peri = "#6667AB", Iced_Coffee = "#B38F6A", Pink_Flambe = "#D75078", Fuchsia_Pink = "#E288B6", Paradise_Pink = "#E9445D", Cornsilk = "#EEC272", Tourmaline = "#85A0A9" ) # rtemis_palettes ---- rtemis_palettes <- list( ucsfCol = ucsfCol, pennCol = pennCol, imperialCol = imperialCol, stanfordCol = stanfordCol, ucdCol = ucdCol, berkeleyCol = berkeleyCol, ucscCol = ucscCol, ucmercedCol = ucmercedCol, ucsbCol = ucsbCol, uclaCol = uclaCol, ucrColor = ucrColor, uciCol = uciCol, ucsdCol = ucsdCol, ucCol = ucCol, scrippsCol = scrippsCol, caltechCol = caltechCol, cmuCol = cmuCol, princetonCol = princetonCol, columbiaCol = columbiaCol, yaleCol = yaleCol, brownCol = brownCol, cornellCol = cornellCol, hmsCol = hmsCol, dartmouthCol = dartmouthCol, usfCol = usfCol, uwCol = uwCol, jhuCol = jhuCol, nyuCol = nyuCol, washuCol = washuCol, chicagoCol = chicagoCol, pennstateCol = pennstateCol, msuCol = msuCol, michiganCol = michiganCol, iowaCol = iowaCol, texasCol = texasCol, techCol = techCol, jeffersonCol = jeffersonCol, hawaiiCol = hawaiiCol, nihCol = nihCol, torontoCol = torontoCol, mcgillCol = mcgillCol, uclCol = uclCol, oxfordCol = oxfordCol, nhsCol = nhsCol, ethCol = ethCol, rwthCol = rwthCol, firefoxCol = firefoxCol, mozillaCol = mozillaCol, appleCol = appleCol, googleCol = googleCol, amazonCol = amazonCol, microsoftCol = microsoftCol, pantoneBalancingAct = pantoneBalancingAct, pantoneWellspring = pantoneWellspring, pantoneAmusements = pantoneAmusements, grays = grays, rtCol1 = rtCol1, rtms = rtms ) #' Get Color Palette #' #' `get_palette()` returns a color palette (character vector of colors). #' Without arguments, prints names of available color palettes. #' Each palette is a named list of hexadecimal color definitions which can be used with #' any graphics function. #' #' @param palette Character: Name of palette to return. Default = NULL: available palette #' names are printed and no palette is returned. #' @param verbosity Integer: Verbosity level. #' #' @return Character vector of colors for the specified palette, or invisibly returns #' list of available palettes if `palette = NULL`. #' #' @author EDG #' @export #' #' @examples #' # Print available palettes #' get_palette() #' # Get the Imperial palette #' get_palette("imperial") get_palette <- function(palette = NULL, verbosity = 1L) { if (is.null(palette)) { if (verbosity > 0L) { msg(highlight("The following palettes are available:")) print(names(rtemis_palettes)) } } else { palette <- match.arg( palette, names(rtemis_palettes) ) rtemis_palettes[[palette]] } } # /rtemis::get_palette # Xterm Colors ---- XtermCol <- c( `Black (SYSTEM)` = "#000000", `Maroon (SYSTEM)` = "#800000", `Green (SYSTEM)` = "#008000", `Olive (SYSTEM)` = "#808000", `Navy (SYSTEM)` = "#000080", `Purple (SYSTEM)` = "#800080", `Teal (SYSTEM)` = "#008080", `Silver (SYSTEM)` = "#c0c0c0", `Grey (SYSTEM)` = "#808080", `Red (SYSTEM)` = "#ff0000", `Lime (SYSTEM)` = "#00ff00", `Yellow (SYSTEM)` = "#ffff00", `Blue (SYSTEM)` = "#0000ff", `Fuchsia (SYSTEM)` = "#ff00ff", `Aqua (SYSTEM)` = "#00ffff", `White (SYSTEM)` = "#ffffff", Grey0 = "#000000", NavyBlue = "#00005f", DarkBlue = "#000087", Blue3 = "#0000af", Blue3 = "#0000d7", Blue1 = "#0000ff", DarkGreen = "#005f00", DeepSkyBlue4 = "#005f5f", DeepSkyBlue4 = "#005f87", DeepSkyBlue4 = "#005faf", DodgerBlue3 = "#005fd7", DodgerBlue2 = "#005fff", Green4 = "#008700", SpringGreen4 = "#00875f", Turquoise4 = "#008787", DeepSkyBlue3 = "#0087af", DeepSkyBlue3 = "#0087d7", DodgerBlue1 = "#0087ff", Green3 = "#00af00", SpringGreen3 = "#00af5f", DarkCyan = "#00af87", LightSeaGreen = "#00afaf", DeepSkyBlue2 = "#00afd7", DeepSkyBlue1 = "#00afff", Green3 = "#00d700", SpringGreen3 = "#00d75f", SpringGreen2 = "#00d787", Cyan3 = "#00d7af", DarkTurquoise = "#00d7d7", Turquoise2 = "#00d7ff", Green1 = "#00ff00", SpringGreen2 = "#00ff5f", SpringGreen1 = "#00ff87", MediumSpringGreen = "#00ffaf", Cyan2 = "#00ffd7", Cyan1 = "#00ffff", DarkRed = "#5f0000", DeepPink4 = "#5f005f", Purple4 = "#5f0087", Purple4 = "#5f00af", Purple3 = "#5f00d7", BlueViolet = "#5f00ff", Orange4 = "#5f5f00", Grey37 = "#5f5f5f", MediumPurple4 = "#5f5f87", SlateBlue3 = "#5f5faf", SlateBlue3 = "#5f5fd7", RoyalBlue1 = "#5f5fff", Chartreuse4 = "#5f8700", DarkSeaGreen4 = "#5f875f", PaleTurquoise4 = "#5f8787", SteelBlue = "#5f87af", SteelBlue3 = "#5f87d7", CornflowerBlue = "#5f87ff", Chartreuse3 = "#5faf00", DarkSeaGreen4 = "#5faf5f", CadetBlue = "#5faf87", CadetBlue = "#5fafaf", SkyBlue3 = "#5fafd7", SteelBlue1 = "#5fafff", Chartreuse3 = "#5fd700", PaleGreen3 = "#5fd75f", SeaGreen3 = "#5fd787", Aquamarine3 = "#5fd7af", MediumTurquoise = "#5fd7d7", SteelBlue1 = "#5fd7ff", Chartreuse2 = "#5fff00", SeaGreen2 = "#5fff5f", SeaGreen1 = "#5fff87", SeaGreen1 = "#5fffaf", Aquamarine1 = "#5fffd7", DarkSlateGray2 = "#5fffff", DarkRed = "#870000", DeepPink4 = "#87005f", DarkMagenta = "#870087", DarkMagenta = "#8700af", DarkViolet = "#8700d7", Purple = "#8700ff", Orange4 = "#875f00", LightPink4 = "#875f5f", Plum4 = "#875f87", MediumPurple3 = "#875faf", MediumPurple3 = "#875fd7", SlateBlue1 = "#875fff", Yellow4 = "#878700", Wheat4 = "#87875f", Grey53 = "#878787", LightSlateGrey = "#8787af", MediumPurple = "#8787d7", LightSlateBlue = "#8787ff", Yellow4 = "#87af00", DarkOliveGreen3 = "#87af5f", DarkSeaGreen = "#87af87", LightSkyBlue3 = "#87afaf", LightSkyBlue3 = "#87afd7", SkyBlue2 = "#87afff", Chartreuse2 = "#87d700", DarkOliveGreen3 = "#87d75f", PaleGreen3 = "#87d787", DarkSeaGreen3 = "#87d7af", DarkSlateGray3 = "#87d7d7", SkyBlue1 = "#87d7ff", Chartreuse1 = "#87ff00", LightGreen = "#87ff5f", LightGreen = "#87ff87", PaleGreen1 = "#87ffaf", Aquamarine1 = "#87ffd7", DarkSlateGray1 = "#87ffff", Red3 = "#af0000", DeepPink4 = "#af005f", MediumVioletRed = "#af0087", Magenta3 = "#af00af", DarkViolet = "#af00d7", Purple = "#af00ff", DarkOrange3 = "#af5f00", IndianRed = "#af5f5f", HotPink3 = "#af5f87", MediumOrchid3 = "#af5faf", MediumOrchid = "#af5fd7", MediumPurple2 = "#af5fff", DarkGoldenrod = "#af8700", LightSalmon3 = "#af875f", RosyBrown = "#af8787", Grey63 = "#af87af", MediumPurple2 = "#af87d7", MediumPurple1 = "#af87ff", Gold3 = "#afaf00", DarkKhaki = "#afaf5f", NavajoWhite3 = "#afaf87", Grey69 = "#afafaf", LightSteelBlue3 = "#afafd7", LightSteelBlue = "#afafff", Yellow3 = "#afd700", DarkOliveGreen3 = "#afd75f", DarkSeaGreen3 = "#afd787", DarkSeaGreen2 = "#afd7af", LightCyan3 = "#afd7d7", LightSkyBlue1 = "#afd7ff", GreenYellow = "#afff00", DarkOliveGreen2 = "#afff5f", PaleGreen1 = "#afff87", DarkSeaGreen2 = "#afffaf", DarkSeaGreen1 = "#afffd7", PaleTurquoise1 = "#afffff", Red3 = "#d70000", DeepPink3 = "#d7005f", DeepPink3 = "#d70087", Magenta3 = "#d700af", Magenta3 = "#d700d7", Magenta2 = "#d700ff", DarkOrange3 = "#d75f00", IndianRed = "#d75f5f", HotPink3 = "#d75f87", HotPink2 = "#d75faf", Orchid = "#d75fd7", MediumOrchid1 = "#d75fff", Orange3 = "#d78700", LightSalmon3 = "#d7875f", LightPink3 = "#d78787", Pink3 = "#d787af", Plum3 = "#d787d7", Violet = "#d787ff", Gold3 = "#d7af00", LightGoldenrod3 = "#d7af5f", Tan = "#d7af87", MistyRose3 = "#d7afaf", Thistle3 = "#d7afd7", Plum2 = "#d7afff", Yellow3 = "#d7d700", Khaki3 = "#d7d75f", LightGoldenrod2 = "#d7d787", LightYellow3 = "#d7d7af", Grey84 = "#d7d7d7", LightSteelBlue1 = "#d7d7ff", Yellow2 = "#d7ff00", DarkOliveGreen1 = "#d7ff5f", DarkOliveGreen1 = "#d7ff87", DarkSeaGreen1 = "#d7ffaf", Honeydew2 = "#d7ffd7", LightCyan1 = "#d7ffff", Red1 = "#ff0000", DeepPink2 = "#ff005f", DeepPink1 = "#ff0087", DeepPink1 = "#ff00af", Magenta2 = "#ff00d7", Magenta1 = "#ff00ff", OrangeRed1 = "#ff5f00", IndianRed1 = "#ff5f5f", IndianRed1 = "#ff5f87", HotPink = "#ff5faf", HotPink = "#ff5fd7", MediumOrchid1 = "#ff5fff", DarkOrange = "#ff8700", Salmon1 = "#ff875f", LightCoral = "#ff8787", PaleVioletRed1 = "#ff87af", Orchid2 = "#ff87d7", Orchid1 = "#ff87ff", Orange1 = "#ffaf00", SandyBrown = "#ffaf5f", LightSalmon1 = "#ffaf87", LightPink1 = "#ffafaf", Pink1 = "#ffafd7", Plum1 = "#ffafff", Gold1 = "#ffd700", LightGoldenrod2 = "#ffd75f", LightGoldenrod2 = "#ffd787", NavajoWhite1 = "#ffd7af", MistyRose1 = "#ffd7d7", Thistle1 = "#ffd7ff", Yellow1 = "#ffff00", LightGoldenrod1 = "#ffff5f", Khaki1 = "#ffff87", Wheat1 = "#ffffaf", Cornsilk1 = "#ffffd7", Grey100 = "#ffffff", Grey3 = "#080808", Grey7 = "#121212", Grey11 = "#1c1c1c", Grey15 = "#262626", Grey19 = "#303030", Grey23 = "#3a3a3a", Grey27 = "#444444", Grey30 = "#4e4e4e", Grey35 = "#585858", Grey39 = "#626262", Grey42 = "#6c6c6c", Grey46 = "#767676", Grey50 = "#808080", Grey54 = "#8a8a8a", Grey58 = "#949494", Grey62 = "#9e9e9e", Grey66 = "#a8a8a8", Grey70 = "#b2b2b2", Grey74 = "#bcbcbc", Grey78 = "#c6c6c6", Grey82 = "#d0d0d0", Grey85 = "#dadada", Grey89 = "#e4e4e4", Grey93 = "#eeeeee" ) ================================================ FILE: R/utils_plot.R ================================================ # plotops.R # ::rtemis:: # 2020- EDG rtemis.org #' @keywords internal #' @noRd getlim <- function(x, axs = c("r", "i"), axs.r.pct = .04) { axs <- match.arg(axs) .x <- na.exclude(x) .min <- min(.x) .max <- max(.x) if (axs == "r") { .diff <- .max - .min c(.min - axs.r.pct * .diff, .max + axs.r.pct * .diff) } else { c(.min, .max) } } # /rtemis::getlim ================================================ FILE: R/utils_plotly.R ================================================ # utils_plotly # ::rtemis:: # 2021- EDG rtemis.org # plotly_vline calls plotly_vline1 to create a list for one or more vertical # lines, to be passed to plotly::layout's shapes argument plotly_vline1 <- function(x, color = "#F48024", width = 1, dash = "dot") { list( type = "line", x0 = x, x1 = x, y0 = 0, y1 = 1, yref = "paper", line = list( color = color, width = width, dash = dash ) ) } # Calls plotly_vline1 for each x value plotly_vline <- function(x, color = "#F48024", width = 1, dash = "dot") { color <- recycle(color, x) width <- recycle(width, x) dash <- recycle(dash, x) mapply( plotly_vline1, x, color = color, width = width, dash = dash, SIMPLIFY = FALSE ) } # plotly_hline calls plotly_hline1 to create a list for one or more horizontal # lines, to be passed to plotly::layout's shapes argument plotly_hline1 <- function(y, color = "#F48024", width = 1, dash = "dot") { list( type = "line", x0 = 0, x1 = 1, y0 = y, y1 = y, xref = "paper", line = list( color = color, width = width, dash = dash ) ) } plotly_hline <- function(y, color = "#F48024", width = 1, dash = "dot") { color <- recycle(color, y) width <- recycle(width, y) dash <- recycle(dash, y) mapply( plotly_hline1, y, color = color, width = width, dash = dash, SIMPLIFY = FALSE ) } # bracket y values for boxplot htest bracket_y <- function(x, pad = c(.04, .05)) { l <- max(x) + (diff(range(x)) * pad) c(l, rev(l)) } starbracket_y <- function(x, pad = c(.04, .05, .09)) { l <- max(x) + (diff(range(x)) * pad) list(star = l[3], bracket = c(l[1:2], rev(l[1:2]))) } # plotly shade plotly_shade <- function( plt, x, ypos, yneg, col, alpha = 1, legendgroup = NA, showlegend = FALSE ) { plt <- plotly::add_trace( plt, x = x, y = ypos, # type = scatter.type, mode = "lines", line = list(color = "transparent"), legendgroup = legendgroup, showlegend = showlegend, hoverinfo = "none", inherit = FALSE ) plt <- plotly::add_trace( plt, x = x, y = yneg, # type = scatter.type, mode = "lines", fill = "tonexty", fillcolor = plotly::toRGB(col, alpha = alpha), line = list(color = "transparent"), legendgroup = legendgroup, showlegend = showlegend, hoverinfo = "none", inherit = FALSE ) } #' Export plotly plot to file #' #' @param x plotly object. #' @param filename Character: Filename to save the plot to. #' @param width Numeric: Width of the exported image in pixels. #' @param height Numeric: Height of the exported image in pixels. #' @param scale Numeric: Scale factor for the exported image. #' @param import_kaleido Logical: If TRUE, attempts to import kaleido for exporting plotly plots. #' @param verbosity Integer: Verbosity level. #' #' @return NULL #' #' @author EDG #' @keywords internal #' @noRd export_plotly <- function( x, filename, width = 600, height = 600, scale = 1, import_kaleido = TRUE, verbosity = 1L ) { # Import kaleido if (import_kaleido) { tryCatch( { reticulate::py_run_string("import kaleido") cat("Kaleido is available for plotly exports.\n") }, error = function(e) { cat("Installing kaleido for plotly exports...\n") reticulate::py_install("kaleido") reticulate::py_run_string("import kaleido") cat("Kaleido installed successfully.\n") } ) } # Intro if (verbosity > 0L) { msgstart("Exporting plotly plot to ", filename, "...") } # Export to file ---- filename <- normalizePath(filename, mustWork = FALSE) # Create parent directory if it doesn't exist parent_dir <- dirname(filename) if (!dir.exists(parent_dir)) { dir.create(parent_dir, recursive = TRUE) if (!dir.exists(parent_dir)) { cli::cli_abort( "Failed to create directory {.file {parent_dir}}. Check path & permissions." ) } } plotly::save_image( x, file = filename, width = width, height = height, scale = scale ) # Check if the file was created if (!file.exists(filename)) { cli::cli_abort( "Failed to save plotly plot to {.file {filename}}. Check if the file path is correct and writable." ) } else { if (verbosity > 0L) { msgdone() } } } # /rtemis::export_plotly ================================================ FILE: R/utils_print.R ================================================ # print_ops.R # ::rtemis:: # 2016-23 EDG rtemis.org is_common_struct <- function(x) { class(x)[1] %in% c( "numeric", "integer", "character", "logical", "factor", "Date", "POSIXct", "POSIXlt", "list", "data.frame", "matrix", "array", "table", "ts", "tbl_df", "data.table" ) } #' Pretty print list #' #' Pretty print a list (or data frame) recursively #' #' Data frames in R began life as lists #' #' @param x list or object that will be converted to a list. #' @param prefix Character: Optional prefix for names. #' @param pad Integer: Pad output with this many spaces. #' @param item_format Formatting function for list item names. #' @param maxlength Integer: Maximum length of items to show using `headdot()` before truncating with ellipsis. #' @param center_title Logical: If TRUE, autopad title for centering, if present. #' @param title Character: Optional title to print before list. #' @param title_newline Logical: If TRUE, print title on new line. #' @param newline_pre Logical: If TRUE, print newline before list. #' @param format_fn_rhs Formatting function for right-hand side values. #' @param print_class Logical: If TRUE, print abbreviated class of object. #' @param abbrev_class_n Integer: Number of characters to abbreviate class names to. #' @param print_df Logical: If TRUE, print data frame contents, otherwise print n rows and columns. #' @param print_S4 Logical: If TRUE, print S4 object contents, otherwise print class name. #' @param limit Integer: Maximum number of items to show. Use -1 for unlimited. #' #' @author EDG #' #' @keywords internal #' @noRd printls <- function( x, prefix = "", pad = 2L, item_format = bold, maxlength = 4L, center_title = TRUE, title = NULL, title_newline = TRUE, newline_pre = FALSE, format_fn_rhs = ddSci, print_class = TRUE, abbrev_class_n = 3L, print_df = FALSE, print_S4 = FALSE, limit = 12L ) { # Arguments ---- if (newline_pre) { cat("\n") } if (is.null(x)) { if (!is.null(title)) { padcat(title, pad = pad, newline = title_newline, newline_pre = FALSE) } cat(strrep(" ", pad), "NULL", sep = "") } else if (length(x) == 0) { cat(class(x), "of length 0.\n") } else if (is.data.frame(x) && !print_df) { cat( "data.frame with", NROW(x), "rows and", NCOL(x), "columns.\n" ) } else if (!is_common_struct(x)) { cat("object of class:", class(x), "\n") } else { x <- as.list(x) # Get class of each element classes_ <- sapply(x, class) # Remove closures that will cause error is_fn <- which(sapply(x, is.function)) if (length(is_fn) > 0) { for (i in is_fn) { x[[i]] <- paste0(as.character(head(deparse(x[[i]]), n = 1L)), "...") } } # Remove NULLs null_index <- sapply(x, is.null) x[null_index] <- "NULL" xnames <- names(x) lhs <- max(nchar(paste0(prefix, xnames))) + pad if (!is.null(title)) { title_pad <- if (center_title) { max(0, lhs - round((.5 * nchar(title))) - 3) } else { 0 } padcat( title, pad = title_pad, newline = title_newline, newline_pre = FALSE ) } # /title counter <- 0L # Print each item up to limit items if (limit != -1L && length(x) > limit) { padcat( italic(gray( paste( "Showing first", limit, "of", length(x), "items.\n" ) )), pad = pad ) } for (i in seq_along(x)) { counter <- counter + 1L if (limit != -1L && counter > limit) { padcat( italic(gray( paste0( "...", length(x) - limit, " more items not shown.\n" ) )), pad = pad ) break } # Print item if (is.list(x[[i]])) { if (length(x[[i]]) == 0) { cat(paste0( item_format(format( paste0(prefix, xnames[i]), width = lhs, justify = "right" )), ": ", format_fn_rhs("(empty list)"), "\n" )) } else { cat( paste0( item_format(format( paste0(prefix, xnames[i]), width = lhs, justify = "right" )), ": " ) ) if (is_common_struct(x[[i]])) { printls( x[[i]], pad = lhs + 2, newline_pre = TRUE, print_df = print_df ) } else { cat( italic("object of class:", class(x[[i]])), "\n" ) } } } else if (is.logical(x[[i]])) { cat(paste0( item_format(format( paste0(prefix, xnames[i]), width = lhs, justify = "right" )), ": ", if (print_class) { gray(paste0("<", abbreviate("logical", abbrev_class_n), "> ")) }, ifelse(isTRUE(x[[i]]), "TRUE", "FALSE"), "\n" )) } else if (S7_inherits(x[[i]])) { cat( paste0( item_format(format( paste0(prefix, xnames[i]), width = lhs, justify = "right" )), ": " ), "\n" ) # Print S7 object print(x[[i]], pad = lhs + 2) } else if (is.data.frame(x[[i]])) { cat(paste0( item_format(format( paste0(prefix, xnames[i]), width = lhs, justify = "right" )), ": ", if (print_class) { gray(paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> ")) }, headdot(x[[i]], maxlength = maxlength, format_fn = format_fn_rhs), "\n" )) } else if (isS4(x[[i]])) { cat(paste0( item_format(format( paste0(prefix, xnames[i]), width = lhs, justify = "right" )), ": " )) # Print S4 object if (print_S4) { cat("\n") print(x[[i]]) } else { cat("(S4 object of class: '", class(x[[i]]), "')\n", sep = "") } } else if (!is_common_struct(x[[i]])) { cat(paste0( item_format(format( paste0(prefix, xnames[i]), width = lhs, justify = "right" )), ": ", if (print_class) { gray(paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> ")) }, italic("object of class:", class(x[[i]])), "\n" )) } else { cat(paste0( item_format(format( paste0(prefix, xnames[i]), width = lhs, justify = "right" )), ": ", if (print_class) { gray(paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> ")) }, headdot(x[[i]], maxlength = maxlength, format_fn = format_fn_rhs), "\n" )) } } } } # /rtemis::printls # printdf1 # ::rtemis:: # 2016 rtemis.org #' Print 1 x N data frame #' #' Pretty print a data frame containing 1 row of data with named columns as a vertical list #' of " name : value" #' " other.name : other.value" #' #' @param x data frame #' @param pad Integer: Pad output with this many spaces. #' #' @author EDG #' @keywords internal #' @noRd printdf1 <- function(x, pad = 2) { x <- as.data.frame(x) # df <- data.frame(Parameter = c(names(x)), Value = unlist(x), row.names = NULL) xnames <- colnames(x) lhs <- max(nchar(xnames)) + pad for (i in seq_len(ncol(x))) { cat( paste(format(xnames[i], width = lhs, justify = "right"), ":", x[1, i]), "\n" ) } } # /rtemis::printdf1 #' Show data.frame #' #' Create a pretty text representation of a data.frame. #' #' @param x data frame #' @param pad Integer: Pad output with this many spaces. #' @param spacing Integer: Number of spaces between columns. #' @param ddSci_dp Integer: Number of decimal places to print using [ddSci]. Default = NULL for no #' formatting #' @param transpose Logical: If TRUE, transpose `x` before printing. #' @param justify Character: "right", "left". #' @param colnames Logical: If TRUE, print column names. #' @param rownames Logical: If TRUE, print row names. #' @param colnames_formatter Format function for printing column names. #' @param rownames_formatter Format function for printing row names. #' @param newline_pre Logical: If TRUE, print a new line before printing data frame. #' @param newline Logical: If TRUE, print a new line after printing data frame. #' #' @author EDG #' @keywords internal #' @noRd show_df <- function( x, pad = 0L, spacing = 1L, ddSci_dp = NULL, transpose = FALSE, justify = "right", incl_colnames = TRUE, incl_rownames = TRUE, colnames_formatter = highlight, rownames_formatter = gray, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) if (transpose) { x <- as.data.frame(t(x)) } xnames <- colnames(x) xrownames <- gsub(pattern = "\\.", replacement = " ", rownames(x)) if (!is.null(ddSci_dp)) { xf <- as.data.frame(matrix(ddSci(x, decimal_places = ddSci_dp), NROW(x))) colnames(xf) <- xnames rownames(xf) <- xrownames x <- xf } col_char <- sapply(seq_along(xnames), \(i) { max(nchar(as.character(x[, i])), nchar(xnames[i])) }) xrownames_spacing <- if (incl_rownames) { max(nchar(xrownames)) + pad } else { pad } spacer <- strrep(" ", spacing) out <- character() if (incl_colnames) { out <- paste0( out, strrep(" ", xrownames_spacing) ) if (justify == "left") { out <- paste0(out, spacer) } for (i in seq_len(NCOL(x))) { out <- paste0( out, colnames_formatter( format( xnames[i], width = col_char[i] + spacing, justify = justify ), output_type = output_type ) ) } out <- paste0(out, "\n") } # Row names if (incl_rownames) { for (i in seq_len(NROW(x))) { out <- paste0( out, rownames_formatter( format( xrownames[i], width = xrownames_spacing, justify = "right" ), output_type = output_type ) ) for (j in seq_len(NCOL(x))) { out <- paste0( out, spacer, paste(format(x[i, j], width = col_char[j], justify = justify)) ) } out <- paste0(out, "\n") } } else { for (i in seq_len(NROW(x))) { for (j in seq_len(NCOL(x))) { out <- paste0( out, spacer, paste(format(x[i, j], width = col_char[j], justify = justify)) ) } out <- paste0(out, "\n") } } out } # /rtemis::show_df #' Show table #' #' @param x table. #' @param spacing Integer: Number of spaces between columns. #' @param pad Integer: Pad output with this many spaces. #' #' @return Character: formatted string. #' #' @author EDG #' #' @keywords internal #' @noRd show_table <- function( x, spacing = 2L, pad = 2L, formatter = highlight, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) dim_names <- names(attr(x, "dimnames")) class_names <- attr(x, "dimnames")[["Reference"]] n_classes <- NCOL(x) mat <- matrix(c(x), NROW(x)) colnames(mat) <- colnames(x) rownames(mat) <- rownames(x) # Column width without spacing col.width <- sapply(seq_along(class_names), \(i) { max(nchar(as.character(x[, i])), nchar(class_names[i])) }) lhspad <- max(nchar(class_names), nchar(dim_names[1])) + spacing + pad # Top dimname formatted_dimname <- format( dim_names[2], width = lhspad + nchar(dim_names[2]), justify = "right" ) out <- paste0( bold(formatted_dimname, output_type = output_type), "\n" ) # Left dimname formatted_dimname1 <- format( dim_names[1], width = lhspad - spacing, justify = "right" ) out <- paste0( out, bold(formatted_dimname1, output_type = output_type), strrep(" ", spacing) ) # Column names # (Continue on same row as left dimname) for (i in seq_len(n_classes)) { formatted_classname <- format( class_names[i], width = col.width[i] + spacing, justify = "left" ) out <- paste0( out, formatter(formatted_classname, output_type = output_type) ) } # Add Confusion matrix excluding colnames that are already added out <- paste0( out, "\n", show_df( mat, pad = lhspad - max(nchar(class_names)) - spacing, incl_colnames = FALSE, spacing = spacing, colnames_formatter = formatter, rownames_formatter = formatter, output_type = output_type ) ) out } # /rtemis::show_table #' @keywords internal #' @noRd pastels <- function(x, bullet = " -") { paste(paste(bullet, x, collapse = "\n"), "\n") } # /rtemis::pastels #' Get first few elements of a vector with ellipsis #' #' @details #' Used, for example, by `repr_ls` #' #' @return Character. #' #' @keywords internal #' @noRd headdot <- function(x, maxlength = 6L, format_fn = identity) { if (maxlength == -1L || length(x) < maxlength) { paste(format_fn(x), collapse = ", ") } else { paste0( paste(format_fn(head(as.vector(x), n = maxlength)), collapse = ", "), "..." ) } } # /rtemis::headdot #' Print Size #' #' Get `NCOL(x)` and \code{NROW{x}} #' #' @param x R object (usually that inherits from matrix or data.frame) #' @param name Character: Name of input object #' @param verbosity Integer: Verbosity level. #' @param newline Logical: If TRUE, end with new line character. #' #' @return vector of NROW, NCOL invisibly #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' catsize(iris) catsize <- function(x, name = NULL, verbosity = 1L, newline = TRUE) { if (inherits(x, c("matrix", "data.frame"))) { .nrow <- NROW(x) .ncol <- NCOL(x) nrows <- format(.nrow, big.mark = ",") ncols <- format(.ncol, big.mark = ",") if (verbosity > 0L) { pcat( name, paste(highlight(nrows), "x", highlight(ncols)), newline = newline ) } invisible(c(.nrow, .ncol)) } else { .nels <- length(x) nels <- format(.nels, big.mark = ",") if (verbosity > 0L) { cat( # "There", # ngettext(.nels, "is", "are"), name, highlight(nels), # ngettext(.nels, "element", "elements"), if (newline) "\n" ) } invisible(.nels) } } # /rtemis::catsize #' @author EDG #' @keywords internal #' @noRd list2text <- function(x, sep = ": ", line = "\n") { .names <- names(x) sapply(seq_along(x), \(i) { paste0(.names[i], sep, x[[i]], line) }) |> paste0(collapse = "") } # /rtemis::list2text #' List to HTML #' #' @author EDG #' @keywords internal #' @noRd list2html <- function( x, sep = ": ", col = "#16A0AC", key_weight = 100, value_weight = 300, line = "
" ) { .names <- names(x) sapply(seq_along(x), \(i) { paste0( span(.names[i], style = paste0("font-weight:", key_weight, ";")), sep, span( x[[i]], style = paste0("color:", col, "; font-weight:", value_weight, ";") ), line ) }) |> paste0(collapse = "") |> htmltools::HTML() } # /rtemis::list2html #' Helper function to build padded string equivalent of padcat #' #' @param text Character: Text to pad. #' @param pad Integer: Number of spaces to pad. #' @param newline_pre Logical: If TRUE, add newline before text. #' @param newline Logical: If TRUE, add newline after text. #' #' @return Character: Padded string. #' #' @author EDG #' @keywords internal #' @noRd show_padded <- function( text, pad = 2L, newline_pre = FALSE, newline = FALSE ) { result <- "" if (newline_pre) { result <- paste0(result, "\n") } result <- paste0(result, strrep(" ", pad)) result <- paste0(result, text) if (newline) { result <- paste0(result, "\n") } result } #' Show list as formatted string #' #' Works exactly like printls, but instead of printing to console with cat, #' it outputs a single string, formatted using mformat, so that cat(repr_ls(x)) #' looks identical to printls(x) for any list x #' #' @param x list or object that will be converted to a list. #' @param prefix Character: Optional prefix for names. #' @param pad Integer: Pad output with this many spaces. #' @param item_format Formatting function for items. #' @param maxlength Integer: Maximum length of items to show using `headdot()` before truncating with ellipsis. #' @param center_title Logical: If TRUE, autopad title for centering, if present. #' @param title Character: Title to print before list. #' @param title_newline Logical: If TRUE, print title on new line. #' @param newline_pre Logical: If TRUE, print newline before list. #' @param format_fn_rhs Formatting function for right-hand side of items. #' @param print_class Logical: If TRUE, print abbreviated class of object. #' @param abbrev_class_n Integer: Number of characters to abbreviate class names to. #' @param print_df Logical: If TRUE, print data frame contents, otherwise print n rows and columns. #' @param print_S4 Logical: If TRUE, print S4 object contents, otherwise print class name. #' @param limit Integer: Maximum number of items to show. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted string that can be printed with cat() #' #' @author EDG #' #' @keywords internal #' @noRd repr_ls <- function( x, prefix = "", pad = 2L, item_format = bold, maxlength = 4L, center_title = TRUE, title = NULL, title_newline = TRUE, newline_pre = FALSE, format_fn_rhs = ddSci, print_class = TRUE, abbrev_class_n = 3L, print_df = FALSE, print_S4 = FALSE, limit = 12L, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) # Initialize output string result <- "" # Arguments ---- if (newline_pre) { result <- paste0(result, "\n") } if (is.null(x)) { if (!is.null(title)) { result <- paste0( result, show_padded(title, pad = pad, newline = title_newline) ) } result <- paste0(result, strrep(" ", pad), "NULL") } else if (length(x) == 0) { result <- paste0(result, class(x)[1], " of length 0.\n") } else if (is.data.frame(x) && !print_df) { result <- paste0( result, show_padded("(data.frame with ", pad = pad), NROW(x), " rows and ", NCOL(x), " columns.)\n" ) } else if (!is_common_struct(x)) { result <- paste0( result, "object of class: ", paste(class(x), collapse = ", "), "\n" ) } else { x <- as.list(x) # Get class of each element classes_ <- sapply(x, function(el) class(el)[[1L]]) # Deparse closures that would cause error is_fn <- which(sapply(x, is.function)) if (length(is_fn) > 0) { for (i in is_fn) { x[[i]] <- paste0(as.character(head(deparse(x[[i]]), n = 1L)), "...") } } # Set NULLs to "NULL" null_index <- sapply(x, is.null) x[null_index] <- "NULL" xnames <- names(x) lhs <- max(nchar(paste0(prefix, xnames))) + pad if (!is.null(title)) { title_pad <- if (center_title) { max(0, lhs - round((.5 * nchar(title))) - 3) } else { 0 } result <- paste0( result, show_padded(title, pad = title_pad, newline = title_newline) ) } # /title counter <- 0L # Print each item up to limit items if (limit != -1L && length(x) > limit) { limit_text <- paste0( italic( gray( paste0( "Showing first ", limit, " of ", length(x), " items.\n" ), output_type = output_type ), output_type = output_type ) ) result <- paste0(result, show_padded(limit_text, pad = pad)) } for (i in seq_along(x)) { counter <- counter + 1L if (limit != -1L && counter > limit) { more_text <- paste0( italic( gray( paste0( "...", length(x) - limit, " more items not shown.\n" ) ), output_type = output_type ) ) result <- paste0(result, show_padded(more_text, pad = pad)) break } # Print item if (is.list(x[[i]])) { if (length(x[[i]]) == 0) { item_text <- paste0( item_format( format( paste0(prefix, xnames[i]), width = lhs, justify = "right" ), output_type = output_type ), ": ", format_fn_rhs("(empty list)"), "\n" ) result <- paste0(result, item_text) } else { item_text <- paste0( item_format( format( paste0(prefix, xnames[i]), width = lhs, justify = "right" ), output_type = output_type ), ": " ) result <- paste0(result, item_text) if (is_common_struct(x[[i]])) { sub_result <- repr_ls( x[[i]], pad = lhs + 2, item_format = item_format, newline_pre = TRUE, # important format_fn_rhs = format_fn_rhs, print_class = print_class, limit = limit, output_type = output_type ) result <- paste0(result, sub_result) } else { result <- paste0( result, italic( paste( "object of class:", paste(class(x[[i]]), collapse = ", ") ), output_type = output_type ), "\n" ) } } } else if (is.logical(x[[i]])) { item_text <- paste0( item_format( format( paste0(prefix, xnames[i]), width = lhs, justify = "right" ), output_type = output_type ), ": ", if (print_class) { gray( paste0("<", abbreviate("logical", abbrev_class_n), "> "), output_type = output_type ) } else { "" }, ifelse(isTRUE(x[[i]]), "TRUE", "FALSE"), "\n" ) result <- paste0(result, item_text) } else if (S7_inherits(x[[i]])) { item_text <- paste0( item_format( format( paste0(prefix, xnames[i]), width = lhs, justify = "right" ), output_type = output_type ), ":\n" # S7 show begin on next line, otherwise must have different pad for first line (S7name) and for rest ) result <- paste0(result, item_text) # Show S7 object: repr() must return a character string of length 1 s7_output <- tryCatch( { repr(x[[i]], pad = lhs + 2, output_type = output_type) }, error = function(e) { paste0( "(S7 object of class: '", paste(class(x[[i]]), collapse = ", "), "')\n" ) } ) result <- paste0(result, s7_output) } else if (is.data.frame(x[[i]])) { item_text <- paste0( item_format( format( paste0(prefix, xnames[i]), width = lhs, justify = "right" ), output_type = output_type ), ": ", if (print_class) { gray( paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "), output_type = output_type ) } else { "" }, headdot(x[[i]], maxlength = maxlength, format_fn = format_fn_rhs), "\n" ) result <- paste0(result, item_text) } else if (isS4(x[[i]])) { item_text <- paste0( item_format( format( paste0(prefix, xnames[i]), width = lhs, justify = "right" ), output_type = output_type ), ": " ) result <- paste0(result, item_text) # Print S4 object if (print_S4) { result <- paste0(result, "\n") # For S4 objects, we would need to capture their print output # This is complex, so for now we'll just show the class result <- paste0( result, "(S4 object of class: '", paste(class(x[[i]]), collapse = ", "), "')\n" ) } else { result <- paste0( result, "(S4 object of class: '", paste(class(x[[i]]), collapse = ", "), "')\n" ) } } else if (!is_common_struct(x[[i]])) { item_text <- paste0( item_format( format( paste0(prefix, xnames[i]), width = lhs, justify = "right" ), output_type = output_type ), ": ", if (print_class) { gray( paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "), output_type = output_type ) } else { "" }, italic( paste( "object of class:", paste(class(x[[i]]), collapse = ", ") ), output_type = output_type ), "\n" ) result <- paste0(result, item_text) } else { item_text <- paste0( item_format( format( paste0(prefix, xnames[i]), width = lhs, justify = "right" ), output_type = output_type ), ": ", if (print_class) { gray( paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "), output_type = output_type ) } else { "" }, headdot(x[[i]], maxlength = maxlength, format_fn = format_fn_rhs), "\n" ) result <- paste0(result, item_text) } } } result } # /rtemis::repr_ls # %% inspect.class_data.frame ---- method(inspect, class_data.frame) <- function(x) { out <- paste0( fmt("<", col = "#808080"), fmt(class(x)[[1L]], col = highlight_col, bold = TRUE), fmt(">", col = "#808080"), " ", fmt(NROW(x), bold = TRUE), fmt(" x ", col = "#808080"), fmt(NCOL(x), bold = TRUE), "\n", repr_ls(x, pad = 0L, print_class = TRUE, print_df = TRUE) ) cat(out) invisible(out) } # /rtemis::inspect.class_data.frame ================================================ FILE: R/utils_rt.R ================================================ # info # ::rtemis:: # 2016- EDG rtemis.org #' `rtemis-internals`: `intro` #' #' Intro #' #' Starts function execution timer and opens log file. #' Pairs with `outro`. #' #' @keywords internal #' @noRd intro <- function( .message = "\u25b6", logfile = NULL, call_depth = 1, caller = NULL, newline_pre = FALSE, use_sink = FALSE, verbosity = 1L ) { if (!is.null(logfile)) { logfile <- normalizePath(logfile, mustWork = FALSE) outdir <- dirname(logfile) if (!dir.exists(outdir)) { dir.create(outdir, showWarnings = FALSE, recursive = TRUE) } if (use_sink) { sink(logfile, append = TRUE, split = verbosity > 0L) } log_to_file("Started.", logfile = logfile) } start_time <- proc.time() if (verbosity > 0L || !is.null(logfile)) { if (newline_pre) { cat("\n") } msg( .message, call_depth = call_depth, sep = "", caller_id = 2, caller = caller ) } invisible(start_time) } # /rtemis::intro # Function to output seconds if seconds < 60, otherwise output minutes #' @keywords internal #' @noRd format_seconds <- function(seconds) { if (seconds < 60) { paste0(bold(ddSci(seconds)), " seconds") } else { paste0(bold(ddSci(round(seconds / 60))), " minutes") } } #' `rtemis-internals`: `outro` #' #' Outro #' #' Stops function execution timer and closes log file. #' #' Second part to `intro` #' #' @keywords internal #' @noRd outro <- function( start_time, message = NULL, sink_off = FALSE, logfile = NULL, newline_pre = FALSE, real_user_system = FALSE, verbosity = 1L ) { elapsed <- as.numeric(proc.time() - start_time) if (verbosity > 0L || sink_off) { if (newline_pre) { cat("\n") } if (real_user_system) { msg0( paste0( checkmark(), " Done in ", format_seconds(elapsed[3]), " (", "Real:", ddSci(elapsed[3]), "/User:", ddSci(elapsed[1]), "/System:", ddSci(elapsed[2]), ")." ), caller_id = 2 ) } else { msg0( paste0( checkmark(), " Done in ", format_seconds(elapsed[3]), "." ), caller_id = 2 ) } } if (sink_off) { sink() } if (!is.null(logfile)) { log_to_file("Done.", logfile = logfile) } invisible(elapsed) } # /rtemis::outro #' Summarize supervised inputs #' #' @param x tabular data: Training set data. #' @param dat_validation data.frame or similar: Validation set data. #' @param dat_test data.frame or similar: Test set data. #' #' @author EDG #' @keywords internal #' @noRd summarize_supervised <- function( x, dat_validation = NULL, dat_test = NULL ) { # msg("Input data summary:") msg0( if (!is.null(dat_validation)) " ", "Training set: ", highlight(NROW(x)), " cases x ", highlight(NCOL(x) - 1), " features." ) if (!is.null(dat_validation)) { msg0( "Validation set: ", highlight(NROW(dat_validation)), " cases x ", highlight(NCOL(dat_validation) - 1), " features." ) } if (!is.null(dat_test)) { msg0( if (!is.null(dat_validation)) " ", " Test set: ", highlight(NROW(dat_test)), " cases x ", highlight(NCOL(dat_test) - 1), " features." ) } } # /rtemis::summarize_supervised #' Summarize unsupervised inputs #' #' @param x tabular data: Training set data. #' #' @author EDG #' @keywords internal #' @noRd summarize_unsupervised <- function(x) { msg( "Input:", highlight(NROW(x)), "cases x", highlight(NCOL(x)), "features." ) } # /rtemis::summarize_unsupervised #' Log to file #' #' @param x Character: Message to log. #' @param logfile Character: Path to log file. #' #' @author EDG #' #' @keywords internal #' @noRd log_to_file <- function(x, logfile) { cat( paste0( datetime(), " ", x, "\n" ), file = logfile, append = TRUE ) } # /rtemis::log_to_file ================================================ FILE: R/utils_rules.R ================================================ # utils_rules.R # ::rtemis:: # EDG rtemis.org #' Match Rules to Cases #' #' @param x Matrix / data frame: Input features #' @param rules Character vector: Rules #' @param verbosity Integer: Verbosity level. #' #' @return cases-by-rules matrix (binary; 1: match, 0: no match) #' @author EDG #' #' @keywords internal #' @noRd match_cases_by_rules <- function(x, rules, prefix = "Rule_", verbosity = 1L) { n_cases <- NROW(x) n_rules <- length(rules) if (!is.data.table(x)) { # {data.table} x <- data.table::as.data.table(x) } else { # Either make copy, or set ID to NULL before exit # x <- copy(x) on.exit(x[, ID := NULL]) } # appease R CMD check ID <- NULL x[, ID := seq_len(n_cases)] cxr <- matrix(0, n_cases, n_rules) if (verbosity > 0L) { msgstart( "Matching", highlight(n_rules), "rules to", highlight(n_cases), "cases..." ) } for (i in seq_along(rules)) { match <- x[eval(parse(text = rules[i])), ID] cxr[match, i] <- 1 } if (!is.null(prefix)) { colnames(cxr) <- paste0(prefix, seq_len(n_rules)) } if (verbosity > 0L) { msgdone() } cxr } # /rtemis::match_cases_by_rules #' Index cases by rules #' #' Get an index of which cases match which rule - meant for cases where each case matches one rule #' and one rule only #' #' @inheritParams match_cases_by_rules #' #' @author EDG #' #' @keywords internal #' @noRd index_cases_by_rules <- function(x, rules, verbosity = 1L) { cxr <- match_cases_by_rules(x, rules, verbosity) apply(cxr, 1, \(i) which(i == 1)) } #' Prune a rule to a maximum length #' #' @param rule Character: A rule. #' @param max_length Integer: The maximum number of conditions to keep. #' @param sep Character: The separator between conditions. #' #' @return Character: The pruned rule. #' @author EDG #' #' @keywords internal #' @noRd simple_prune_ <- function(rule, max_length, sep = " & ") { conditions <- strsplit(rule, sep)[[1]] if (length(conditions) > max_length) { conditions <- conditions[1:max_length] paste(conditions, collapse = sep) } else { rule } } # /rtemis::simple_prune_ #' Prune rules to a maximum length #' #' @param rule Character vector: Rules. #' @param max_length Integer: The maximum number of conditions to keep. #' @param sep Character: The separator between conditions. #' #' @return Character: The pruned rule. #' #' @author EDG #' #' @keywords internal #' @noRd simple_prune <- function(rules, max_length, sep = " & ") { rules <- sapply( rules, simple_prune_, max_length = max_length, sep = sep, USE.NAMES = FALSE ) rules } # /rtemis::simple_prune #' Extract variable names from rules #' #' @param rules Character vector: Rules. #' @param unique Logical: If TRUE, return only unique variables. #' #' @return Character vector: Variable names. #' #' @author EDG #' @keywords internal #' @noRd get_vars_from_rules <- function(rules, unique = FALSE) { # Extract variables from rules vars <- unique(unlist(strsplit(rules, " & "))) # Get string up to first "<", ">", "=", "!", or "%in%" vars <- gsub("(<|>|=|!|%in%).*", "", vars) vars <- gsub(" .*", "", vars) if (unique) { vars <- unique(vars) } vars } # /rtemis::get_vars_from_rules #' Format rules #' #' Converts R-executable logical expressions to a more human-friendly format #' #' @param x Vector, string: Logical expressions #' @param space_after_comma Logical: If TRUE, place spaces after commas. #' @param decimal_places Integer: Limit all floats (numbers of the form 9.9) to this many #' decimal places #' #' @return Character vector: Formatted rules. #' #' @author EDG #' @keywords internal #' @noRd format_rules <- function(x, space_after_comma = FALSE, decimal_places = NULL) { x <- gsub("[&+]", "AND", x) x <- gsub(">", " > ", x) x <- gsub("<=", " <= ", x) x <- gsub("%in%", "IN", x) x <- gsub("c\\(", "{", x) x <- gsub("\\)", "}", x) x <- gsub("'", "", x) if (space_after_comma) { x <- gsub(",", ", ", x) } if (!is.null(decimal_places)) { x <- gsubfn::gsubfn( "([0-9.]+[0-9])", function(i) ddSci(i, decimal_places = decimal_places), x, engine = "R" ) } x } # /rtemis::format_rules #' Format LightRuleFit rules #' #' Converts R-executable logical expressions to a more human-friendly format #' #' @param x Vector, string: Logical expressions #' @param space_after_comma Logical: If TRUE, place spaces after commas. #' @param decimal_places Integer: Limit all floats (numbers of the form 9.9) to this many #' decimal places #' #' @return Character vector: Formatted rules. #' #' @author EDG #' @keywords internal #' @noRd format_LightRuleFit_rules <- function( x, space_after_comma = FALSE, decimal_places = NULL ) { x <- gsub("[&+]", "AND", x) x <- gsub(">", " > ", x) x <- gsub("<=", " <= ", x) x <- gsub("%in%", "IN", x) x <- gsub("%notin%", "NOT IN", x) x <- gsub("c\\(", "{", x) x <- gsub("\\)", "}", x) x <- gsub("'", "", x) if (space_after_comma) { x <- gsub(",", ", ", x) } if (!is.null(decimal_places)) { x <- gsubfn::gsubfn( "([0-9.]+[0-9])", function(i) ddSci(i, decimal_places = decimal_places), x, engine = "R" ) } gsub(" ", " ", x) } # /rtemis::format_LightRuleFit_rules # rules2medmod # ::rtemis:: # 2018 EDG rtemis.org #' Convert rules from cutoffs to median/mode and range #' #' Convert rules from cutoffs to `median (range)` and `mode (range)` format #' #' @param rules Character, vector: Input rules #' @param x Data frame: Data to evaluate rules #' @param .ddSci Logical: If TRUE, format all continuous variables using #' [ddSci], which will give either 2 decimal places, or scientific #' notation if two decimal places result in 0.00 #' @param verbosity Integer: Verbosity level. #' #' @return Character vector. #' #' @author EDG #' @keywords internal #' @noRd rules2medmod <- function(rules, x, .ddSci = TRUE, verbosity = 1L) { cxr <- match_cases_by_rules(x, rules, verbosity = verbosity) nrules <- length(rules) rules_f <- vector("character", nrules) frmt <- if (.ddSci) ddSci else I if (verbosity > 0L) { msg("Converting rules...") } for (i in seq(rules)) { if (verbosity > 1L) { msg_info("#", i, "/", nrules, "...") } dat <- x[cxr[, i] == 1, ] sub <- strsplit(rules[i], "&")[[1]] rule <- character() for (j in seq(sub)) { categorical <- grepl("%in%", sub[j]) if (categorical) { var <- gsub("\\s", "", strsplit(sub[j], "%in%")[[1]][1]) vals <- dat[[var]] value <- paste0( get_mode(vals), " (", paste(levels(droplevels(vals)), collapse = ", "), ")" ) rule[j] <- paste0(var, " = ", value) } else { sub[j] <- gsub(">|>=|<|<=", "@", sub[j]) var <- gsub("\\s", "", strsplit(sub[j], "@")[[1]][1]) vals <- dat[[var]] value <- paste0( frmt(median(vals)), " (", frmt(min(vals)), ":", frmt(max(vals)), ")" ) rule[j] <- paste0(var, " = ", value) } } # /loop through each rule's conditions # This consolidates conditions like a > 3 & a > 5 to one rules_f[i] <- paste(unique(rule), collapse = " & ") } # /loop through rules if (verbosity > 0L) { msg("Done") } rules_f } # /rtemis::rules2medmod ================================================ FILE: R/utils_strings.R ================================================ # strng.R # ::rtemis:: # 2022 EDG rtemis.org # General hilite function output bold + any color. hilite <- function( ..., col = highlight_col, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) if (output_type == "ansi") { paste0("\033[1;38;5;", col, "m", paste(...), "\033[0m") } else if (output_type == "html") { paste0( "", paste(...), "" ) } else { paste0(...) } } # /rtemis::hilite #' @param x Numeric: Input #' #' @keywords internal #' @noRd highlightbig <- function(x, output_type = c("ansi", "html", "plain")) { highlight( format(x, scientific = FALSE, big.mark = ","), output_type = output_type ) } #' Red #' #' @author EDG #' @keywords internal #' @noRd red <- function(..., bold = FALSE, output_type = c("ansi", "html", "plain")) { fmt( paste(...), col = rt_red, bold = bold, output_type = output_type ) } #' Green #' #' @author EDG #' @keywords internal #' @noRd green <- function(..., bold = FALSE, output_type = c("ansi", "html", "plain")) { fmt( paste(...), col = rt_green, bold = bold, output_type = output_type ) } #' Blue #' #' @author EDG #' @keywords internal #' @noRd blue <- function(..., bold = FALSE, output_type = c("ansi", "html", "plain")) { fmt( paste(...), col = rt_blue, bold = bold, output_type = output_type ) } #' Orange #' #' @author EDG #' @keywords internal #' @noRd orange <- function( ..., bold = FALSE, output_type = c("ansi", "html", "plain") ) { fmt( paste(...), col = rt_orange, bold = bold, output_type = output_type ) } #' Reset ANSI formatting #' #' @param ... Optional character: Text to be output to console. #' #' @return Character: Text with ANSI reset code prepended. #' #' @author EDG #' @keywords internal #' @noRd reset <- function(...) { paste0("\033[0m", paste(...)) } #' Get rtemis citation #' #' @return Character: Citation command. #' #' @author EDG #' @keywords internal #' @noRd rtcitation <- paste0( "> ", fmt("citation", col = rt_blue), "(", fmt("rtemis", col = rt_teal), ")" ) checkmark <- function( col = rt_green, output_type = c("ansi", "html", "plain") ) { fmt("\u2713", col = col, bold = TRUE, output_type = output_type) } crossmark <- function(output_type = c("ansi", "html", "plain")) { fmt("\u2715", col = rt_red, bold = TRUE, output_type = output_type) } #' Success message #' #' @param ... Character: Message components. #' @param sep Character: Separator between message components. #' @param end Character: End character. #' @param pad Integer: Number of spaces to pad the message with. #' #' @author EDG #' @keywords internal #' @noRd yay <- function(..., sep = " ", end = "\n", pad = 0) { message( strrep(" ", pad), paste(checkmark(), ..., sep = sep), end, appendLF = FALSE ) } # /rtemis::yay #' Failure message #' #' @param ... Character: Message components. #' @param sep Character: Separator between message components. #' @param end Character: End character. #' @param pad Integer: Number of spaces to pad the message with. #' #' @author EDG #' @keywords internal #' @noRd nay <- function(..., sep = " ", end = "\n", pad = 0) { message( strrep(" ", pad), paste(crossmark(), ..., sep = sep), end, appendLF = FALSE ) } # /rtemis::nay #' Format text for label printing #' #' @param x Character: Input #' @param underscores_to_spaces Logical: If TRUE, convert underscores to spaces. #' @param dotsToSpaces Logical: If TRUE, convert dots to spaces. #' @param toLower Logical: If TRUE, convert to lowercase (precedes `toTitleCase`). #' Default = FALSE (Good for getting all-caps words converted to title case, bad for abbreviations #' you want to keep all-caps) #' @param toTitleCase Logical: If TRUE, convert to Title Case. Default = TRUE (This does not change #' all-caps words, set `toLower` to TRUE if desired) #' @param capitalize_strings Character, vector: Always capitalize these strings, if present. Default = `"id"` #' @param stringsToSpaces Character, vector: Replace these strings with spaces. Escape as needed for `gsub`. #' Default = `"\\$"`, which formats common input of the type `data.frame$variable` #' #' @return Character vector. #' #' @author EDG #' @export #' #' @examples #' x <- c("county_name", "total.cost$", "age", "weight.kg") #' labelify(x) labelify <- function( x, underscores_to_spaces = TRUE, dotsToSpaces = TRUE, toLower = FALSE, toTitleCase = TRUE, capitalize_strings = c("id"), stringsToSpaces = c("\\$", "`") ) { if (is.null(x)) { return(NULL) } xf <- x for (i in stringsToSpaces) { xf <- gsub(i, " ", xf) } for (i in capitalize_strings) { xf <- gsub(paste0("^", i, "$"), toupper(i), xf, ignore.case = TRUE) } if (underscores_to_spaces) { xf <- gsub("_", " ", xf) } if (dotsToSpaces) { xf <- gsub("\\.", " ", xf) } if (toLower) { xf <- tolower(xf) } if (toTitleCase) { xf <- tools::toTitleCase(xf) } xf <- gsub(" {2,}", " ", xf) xf <- gsub(" $", "", xf) # Remove [[X]], where X is any length of characters or numbers gsub("\\[\\[.*\\]\\]", "", xf) } # /rtemis::labelify #' Clean names #' #' Clean character vector by replacing all symbols and sequences of symbols with single #' underscores, ensuring no name begins or ends with a symbol #' #' @param x Character vector. #' @param sep Character: Separator to replace symbols with. #' @param prefix_digits Character: prefix to add to names beginning with a #' digit. Set to NA to skip. #' #' @return Character vector. #' #' @author EDG #' @export #' #' @examples #' x <- c("Patient ID", "_Date-of-Birth", "SBP (mmHg)") #' x #' clean_names(x) #' clean_names(x, sep = " ") clean_names <- function(x, sep = "_", prefix_digits = "V_") { xc <- gsub("[^[:alnum:]]{1,}", sep, x) xc <- gsub(paste0("^", sep, "+|", sep, "+$"), "", xc) if (!is.na(prefix_digits)) { sn_idi <- grep("^[0-9]", xc) xc[sn_idi] <- paste0(prefix_digits, xc[sn_idi]) } xc } # /rtemis::clean_names #' Clean column names #' #' Clean column names by replacing all spaces and punctuation with a single underscore #' #' @param x Character vector OR any object with `colnames()` method, like matrix, data.frame, #' data.table, tibble, etc. #' @param lowercase Logical: If TRUE, convert to lowercase. #' @param uppercase Logical: If TRUE, convert to uppercase. #' @param titlecase Logical: If TRUE, convert to Title Case. #' #' @return Character vector with cleaned names. #' #' @author EDG #' @export #' #' @examples #' clean_colnames(iris, lowercase = FALSE, uppercase = FALSE, titlecase = FALSE) clean_colnames <- function( x, lowercase = FALSE, uppercase = FALSE, titlecase = FALSE ) { # Check arguments: only one of lowercase, uppercase, or titlecase can be TRUE if (sum(c(lowercase, uppercase, titlecase)) > 1) { cli::cli_abort( "Only one of {.arg lowercase}, {.arg uppercase}, or {.arg titlecase} can be TRUE." ) } if (!inherits(x, "character")) { x <- colnames(x) } if (lowercase) { clean_names(tolower(x)) } else if (uppercase) { clean_names(toupper(x)) } else if (titlecase) { gsub(" ", "_", tools::toTitleCase(clean_names(x, sep = " "))) } else { clean_names(x) } } # /rtemis::clean_colnames #' Force plain text when using `message()` #' #' @param x Character: Text to be output to console. #' #' @return Character: Text with ANSI escape codes removed. #' #' @author EDG #' @keywords internal #' @noRd plain <- function(x) { paste0("\033[0m", x) } #' Oxford comma #' #' @param ... Character vector: Items to be combined. #' @param format_fn Function: Any function to be applied to each item. #' #' @return Character: Formatted string with oxford comma. #' #' @author EDG #' @keywords internal #' @noRd oxfordcomma <- function(..., format_fn = identity) { x <- unlist(list(...)) if (length(x) > 2) { paste0( paste(sapply(x[-length(x)], format_fn), collapse = ", "), ", and ", format_fn(x[length(x)]) ) } else if (length(x) == 2) { paste(format_fn(x), collapse = " and ") } else { format_fn(x) } } # /rtemis::oxfordcomma #' Show S7 class name #' #' @param x Character: S7 class name. #' @param col Color: Color code for the object name. #' @param pad Integer: Number of spaces to pad the message with. #' @param prefix Character: Prefix to add to the object name. #' @param output_type Character {"ansi", "html", or "plain"}: Output type. #' #' @return Character: Formatted string that can be printed with cat(). #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' repr_S7name("Supervised") |> cat() repr_S7name <- function( x, col = col_object, bold = TRUE, underline = FALSE, pad = 0L, prefix = NULL, output_type = NULL ) { output_type <- get_output_type(output_type) if (S7_inherits(x)) { x <- S7_class(x)@name } paste0( strrep(" ", pad), fmt("<", col = highlight_col, output_type = output_type), if (!is.null(prefix)) { fmt( prefix, col = col_object, bold = bold, underline = underline, output_type = output_type ) }, fmt( x, col = col, bold = bold, underline = underline, output_type = output_type ), fmt(">", col = highlight_col, output_type = output_type), "\n" ) } # /rtemis::repr_S7name #' Cat object #' #' @param x Character: Object description #' @param col Character: Color code for the object name #' @param pad Integer: Number of spaces to pad the message with. #' @param verbosity Integer: Verbosity level. If > 1, adds package name to the output. #' @param type Character: Output type ("ansi", "html", "plain") #' #' @return NULL: Prints the formatted object description to the console. #' #' @author EDG #' @keywords internal #' @noRd objcat <- function( x, col = col_object, pad = 0L, prefix = NULL, output_type = c("ansi", "html", "plain") ) { output_type <- match.arg(output_type) out <- repr_S7name( x, col = col, pad = pad, prefix = prefix, output_type = output_type ) cat(out) } # /rtemis::objcat #' Function to label #' #' Create axis label from function definition and variable name #' #' @param fn Function. #' @param varname Character: Variable name. #' #' @return Character: Label. #' #' @author EDG #' @keywords internal #' @noRd fn2label <- function(fn, varname) { # Get function body fn_body <- deparse(fn)[2] # Replace "x" with variable name sub("\\(x\\)", paste0("(", varname, ")"), fn_body) } # /rtemis::fn2label #' Padded cat #' #' @param x Character: Text to be output to console. #' @param format_fn Function: Any function to be applied to `x`. #' @param col Color: Any color fn. #' @param newline_pre Logical: If TRUE, start with a new line. #' @param newline Logical: If TRUE, end with a new (empty) line. #' @param pad Integer: Pad message with this many spaces on the left. #' #' @author EDG #' @keywords internal #' @noRd padcat <- function( x, format_fn = I, col = NULL, newline_pre = FALSE, newline = FALSE, pad = 2L ) { x <- as.character(x) if (!is.null(format_fn)) { x <- format_fn(x) } if (newline_pre) { cat("\n") } cat(strrep(" ", pad)) if (!is.null(col)) { cat(col(x, TRUE)) } else { cat(bold(x)) } if (newline) { cat("\n") } } # /rtemis::padcat #' Pad string to target length #' #' @param x Character: String to pad. #' @param target Integer: Target length. #' @param char Character: Padding character. #' #' @return Character: Padded string. #' #' @author EDG #' @keywords internal #' @noRd pad_string <- function(x, target = 17L, char = " ") { lpad <- max(0, target - max(0, nchar(x))) paste0( strrep(char, lpad), x ) } # /rtemis::pad_string #' Pad left string to target length and print with right string #' #' @return Called for side effect: prints padded left string and right string. #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' { #' pcat("super", "wow") #' pcat(NULL, "oooo") #' } pcat <- function(left, right, target = 17, newline = TRUE) { cat(pad_string(left, target = target), right) if (newline) cat("\n") } #' Paste tables #' #' Collapses the contents of two tables element-wise with a separator #' Table names are kept if same, otherwise also collapsed with separator #' #' @param left table: Left table. #' @param right table: Right table. #' @param sep Character: Separator between tables' values. #' #' @return table: Table with collapsed values and names. #' #' @author EDG #' @keywords internal #' @noRd paste_tables <- function(left, right, sep = "=>") { # Check inputs are tables if (!inherits(left, "table") || !inherits(right, "table")) { cli::cli_abort(c( "x" = "Both {.arg left} and {.arg right} must be {.cls table} objects.", "i" = "Got: {.cls {class(left)}} and {.cls {class(right)}}." )) } # Check dimensions match if (!identical(dim(left), dim(right))) { cli::cli_abort(c( "x" = "Tables must have matching dimensions.", "i" = "Got dimensions: {dim(left)} and {dim(right)}." )) } # Paste values element-wise values <- paste(as.vector(left), as.vector(right), sep = sep) # Handle dimnames left_names <- dimnames(left) right_names <- dimnames(right) if (identical(left_names, right_names)) { # Keep names if identical result_names <- left_names } else { # Paste names element-wise if different result_names <- mapply( function(l, r) { if (identical(l, r)) l else paste(l, r, sep = sep) }, left_names, right_names, SIMPLIFY = FALSE ) } # Create result table result <- array(values, dim = dim(left), dimnames = result_names) class(result) <- "table" result } # /rtemis::paste_tables #' Paste data frames #' #' Collapses the contents of two data frames element-wise with a separator #' Column names and row names are kept if same, otherwise also collapsed with separator #' #' @param left data.frame: Left data frame. #' @param right data.frame: Right data frame. #' @param sep Character: Separator between data frames' values. #' #' @return data.frame: Data frame with collapsed values and names. #' #' @author EDG #' @keywords internal #' @noRd paste_dfs <- function(left, right, sep = "=>", decimal_places = 2L) { # Check inputs are data frames if (!inherits(left, "data.frame") || !inherits(right, "data.frame")) { cli::cli_abort(c( "x" = "Both {.arg left} and {.arg right} must be {.cls data.frame} objects.", "i" = "Got: {.cls {class(left)}} and {.cls {class(right)}}." )) } # Check dimensions match if (!identical(dim(left), dim(right))) { cli::cli_abort(c( "x" = "Data frames must have matching dimensions.", "i" = "Got dimensions: {dim(left)} and {dim(right)}." )) } # Paste values element-wise for each column result <- mapply( function(l, r) { paste( ddSci(l, decimal_places = decimal_places), ddSci(r, decimal_places = decimal_places), sep = sep ) }, left, right, SIMPLIFY = FALSE ) # Handle column names left_colnames <- colnames(left) right_colnames <- colnames(right) if (identical(left_colnames, right_colnames)) { result_colnames <- left_colnames } else { result_colnames <- paste(left_colnames, right_colnames, sep = sep) } # Handle row names left_rownames <- rownames(left) right_rownames <- rownames(right) if (identical(left_rownames, right_rownames)) { result_rownames <- left_rownames } else { result_rownames <- paste(left_rownames, right_rownames, sep = sep) } # Create result data frame result_df <- as.data.frame(result, stringsAsFactors = FALSE) colnames(result_df) <- result_colnames rownames(result_df) <- result_rownames result_df } # /rtemis::paste_dfs ================================================ FILE: R/utils_supervised.R ================================================ # super_ops.R # ::rtemis:: # 2024- EDG rtemis.org supervised_type <- function(dat) { if (is.factor(outcome(dat))) { "Classification" } else { "Regression" } } # /rtemis::supervised_type #' Convert probabilities to categorical (factor) #' #' @param x Numeric vector: Probabilities #' @param levels Character vector: Class labels #' @param binclasspos Integer: Index of the positive class for binary classification #' #' @return Factor #' @author EDG #' #' @keywords internal #' @noRd #' #' @examples #' # Binary classification where "A" is the positive class, so .1 => B, .5 & .9 => A #' prob2categorical(c(.1, .5, .9), c("A", "B"), 1) #' # Binary classification where "B" is the positive class, so .1 => A, .5 & .9 => B #' prob2categorical(c(.1, .5, .9), c("A", "B"), 2) #' # Multi-class classification #' prob <- matrix(c(.1, .3, .6, .05, .6, .35, .4, .3, .3), nrow = 3, byrow = TRUE) #' prob2categorical(prob, c("A", "B", "C")) prob2categorical <- function(x, levels, binclasspos = 2L) { n_classes <- length(levels) if (n_classes == 2) { # Binary classification stopifnot(binclasspos %in% c(1, 2)) if (binclasspos == 1L) { levels <- rev(levels) } fitted <- factor( ifelse(x >= .5, 1, 0), levels = c(0, 1), labels = levels ) } else { # Multi-class classification stopifnot(length(levels) == ncol(x)) fitted <- factor( apply(x, 1, which.max), levels = seq_len(n_classes), labels = levels ) } fitted } # /rtemis::prob2categorical #' @keywords internal #' @noRd check_supervised_inputs <- function(x, y = NULL) { if (is.null(y) && NCOL(x) < 2) { cli::cli_abort("y is missing") } } #' Move outcome to last column #' #' @param dat data.frame or similar. #' @param outcome_column Character: Name of outcome column. #' #' @return object of same class as `data` #' #' @author EDG #' @export #' #' @examples #' ir <- set_outcome(iris, "Sepal.Length") #' head(ir) set_outcome <- function(dat, outcome_column) { # Get index of outcome column id <- grep(outcome_column, names(dat)) # Check if (length(id) == 0) { cli::cli_abort('Column "{outcome_column}" not found in data.') } # Reorder columns # => Make S7 generic if (is.data.table(dat)) { dat[, c(setdiff(seq_len(NCOL(dat)), id), id), with = FALSE] } else { dat[, c(setdiff(seq_len(NCOL(dat)), id), id)] } } # /rtemis::set_outcome #' Make formula #' #' Makes a formula from a data.frame assuming the last column is the outcome #' #' @param x data.frame #' #' @return character #' @author EDG #' #' @keywords internal #' @noRd make_formula <- function(x, output = "character") { outcome <- names(x)[NCOL(x)] out <- paste(outcome, "~ .") if (output == "formula") { as.formula(out, env = parent.env(parent.frame())) } else { out } } # /rtemis::make_formula # glm2table.R # ::rtemis:: # 2021 EDG rtemis.org #' Collect summary table from list of massGLMs with same predictors, different outcome #' ("mass-y") #' #' @param x list of [glm] models #' @param xnames Character, vector: names of models #' @param include_anova Integer vector {1, 2, 3}: Output ANOVA Type I, II, and/or III #' p-vals. Type I uses base R `anova()` (sequential); Types II and III use `car::Anova()`. #' NA to skip. #' @param info Logical: If TRUE, warn when values < than machine eps are replaced by #' machine eps #' #' @return `data.table` with glm summaries #' @author EDG #' #' @keywords internal #' @noRd glm2table <- function(x, xnames = NULL, include_anova = NA, info = TRUE) { if (is.null(xnames)) { xnames <- if (!is.null(names(x))) { names(x) } else { paste0("Variable_", seq_along(x)) } } if (any(c(2L, 3L) %in% include_anova)) { check_dependencies("car") } out <- data.table( Variable = xnames, do.call( rbind, c(lapply(x, function(l) { out <- t(coef(summary(l))[-1, , drop = FALSE]) varnames <- gsub(".*\\$", "", colnames(out)) parnames <- c("Coefficient_", "SE_", "t_value_", "p_value_") out <- c(out) names(out) <- c(outer(parnames, varnames, paste0)) out })) ) ) # Convert p-vals equal to 0 to machine double eps # eps <- .Machine[["double.eps"]] # pvals_idc <- getnames(out, starts_with = "p_value") # # appease R CMD check:, use with = FALSE, not ..i # for (i in pvals_idc) { # lteps <- out[, i, with = FALSE] < eps # if (length(lteps) > 0) { # if (info) { # cli::cli_inform("Values < machine double eps converted to double eps") # } # out[, i, with = FALSE][lteps] <- eps # } # } term_labels <- x[[1]] |> terms() |> attr("term.labels") if (1 %in% include_anova) { pvals1 <- t(sapply( x, \(i) anova(i, test = "F")[seq_along(term_labels), 5] )) colnames(pvals1) <- paste( "p_value type I", term_labels ) out <- cbind(out, pvals1) } if (2 %in% include_anova) { pvals2 <- t(sapply( x, \(i) car::Anova(i, type = 2)[seq_along(term_labels), 3] )) colnames(pvals2) <- paste( "p_value type II", term_labels ) out <- cbind(out, pvals2) } if (3 %in% include_anova) { pvals3 <- t(sapply( x, \(i) car::Anova(i, type = 3)[seq_along(term_labels) + 1, 3] )) colnames(pvals3) <- paste( "p_value type III", term_labels ) out <- cbind(out, pvals3) } out } # /rtemis::glm2table #' Collect summary table (p-values) from list of massGAMs with same predictors, #' different outcome ("massy") #' #' @param mods list of [mgcv::gam] models. #' @param modnames Character, vector: names of models. #' #' @return `data.table` with GAM p-value summaries. #' @author EDG #' #' @keywords internal #' @noRd gam2table <- function(mods, modnames = NULL) { if (is.null(modnames)) { modnames <- if (!is.null(names(mods))) { names(mods) } else { paste0("Model_", seq_along(mods)) } } out <- data.table( Variable = modnames, do.call( rbind, c(lapply(mods, get_gam_pvals)) ) ) setnames(out, names(out)[-1], paste("p_value", names(out)[-1])) out } # /rtemis::gam2table #' Get GAM model's p-values for parametric and spline terms #' #' @keywords internal #' @noRd get_gam_pvals <- function(m, warn = TRUE) { eps <- .Machine[["double.eps"]] ms <- summary(m) pvals <- cbind( # s terms as.data.frame(t(ms[["s.table"]][, 4])), # p terms as.data.frame(t(ms[["p.table"]][, 4]))[-1] ) lteps <- pvals < eps if (any(lteps)) { if (warn) { warning("Values < machine double eps converted to double eps") } pvals[lteps] <- eps } pvals } # rtemis::get_gam_pvals #' Class Imbalance #' #' Calculate class imbalance as given by: #' \deqn{I = K\cdot\sum_{i=1}^K (n_i/N - 1/K)^2}{I = K * sum(n_i/N - 1/K)^2} #' where \eqn{K} is the number of classes, and \eqn{n_i} is the number of #' instances of class \eqn{i} #' #' @param x Vector, factor: Outcome. #' #' @return Numeric. #' #' @author EDG #' @export #' #' @examples #' # iris is perfectly balanced #' class_imbalance(iris[["Species"]]) #' # Simulate imbalanced outcome #' x <- factor(sample(c("A", "B"), size = 500L, replace = TRUE, prob = c(0.9, 0.1))) #' class_imbalance(x) class_imbalance <- function(x) { if (!is.factor(x)) { cli::cli_abort("Input must be a factor") } K <- nlevels(x) N <- length(x) freq <- as.data.frame(table(x)) K * sum(sapply(seq(K), function(i) (freq[["Freq"]][i] / N - 1 / K)^2)) } # /rtemis::class_imbalance # expand_grid.R # ::rtemis:: # 2025 EDG rtemis.org #' Expand Grid #' #' Expand grid, converting NULL values to "null" #' #' Since the "null" characters in the resulting data.frame cannot be replaced to NULL, #' they have to be converted back to NULL as needed downstream. #' So make sure your data does not have cheeky character vector with "null" values in it that are #' not actually NULLs. #' #' @param x named list #' #' @return data.frame #' #' @author EDG #' @keywords internal #' @noRd #' #' @examples #' x <- list(a = c(1, 2, 3), b = NULL, c = c("z", "v")) #' expand_grid(x) expand_grid <- function(x, stringsAsFactors = FALSE) { stopifnot(is.list(x)) # Convert all NULL to "null" x <- lapply(x, function(e) if (is.null(e)) "null" else e) # Expand grid expand.grid(x, stringsAsFactors = stringsAsFactors) } # /expand_grid ================================================ FILE: R/utils_uniprot.R ================================================ # uniprot_get.R # ::rtemis:: # 2022 E.D. Gennatas lambdamd.org #' Get protein sequence from UniProt #' #' @param accession Character: UniProt Accession number - e.g. "Q9UMX9" #' @param baseURL Character: UniProt rest API base URL. #' Default = "https://rest.uniprot.org/uniprotkb" #' @param verbosity Integer: Verbosity level. #' #' @return List with three elements: Identifier, Annotation, and Sequence. #' #' @author E.D. Gennatas #' @export #' #' @examples #' \dontrun{ #' # This gets the sequence from uniprot.org by default #' mapt <- uniprot_get("Q9UMX9") #' } uniprot_get <- function( accession, baseURL = "https://rest.uniprot.org/uniprotkb", verbosity = 1 ) { # Check types check_inherits(accession, "character") path <- paste0(baseURL, "/", accession, ".fasta") dat <- seqinr::read.fasta(path, seqtype = "AA") Annotation <- attr(dat[[1]], "Annot") Identifier <- gsub(" .*", "", Annotation) if (verbosity > 0L) { msg("Got:", highlight(Annotation)) } list( Identifier = Identifier, Annotation = Annotation, Sequence = as.character(dat[[1]]) ) } # /rtemis::uniprot_get ================================================ FILE: R/utils_xt.R ================================================ # xtdescribe.R # ::rtemis:: # 2024 EDG #' Describe longitudinal dataset #' #' This function emulates the `xtdescribe` function in Stata. #' #' @param x data.frame: Longitudinal data with ID and time variables. #' @param id_col Integer: The column position of the ID variable. #' @param time_col Integer: The column position of the time variable. #' @param n_patterns Integer: The number of patterns to display. #' #' @return data.frame: Summary of participation patterns, returned invisibly. #' #' @author EDG #' @export #' #' @examples #' # Load example longitudinal dataset #' data(xt_example) #' #' # Describe the longitudinal structure #' xtdescribe(xt_example) xtdescribe <- function( x, id_col = 1, time_col = 2, n_patterns = 9 ) { id_name <- names(x)[id_col] time_name <- names(x)[time_col] # Print vec_describe of ID, with n = number of unique IDs id_us <- sort(unique(x[[id_name]])) n_ids <- length(id_us) time_us <- sort(unique(x[[time_name]])) time_min <- min(time_us) time_max <- max(time_us) leftwidth <- max(nchar(id_name), nchar(time_name)) cat( pad_string(id_name, leftwidth), ": ", vec_describe(id_us), " n = ", length(id_us), "\n", sep = "" ) cat( pad_string(time_name, leftwidth), ": ", vec_describe(time_us), " T = ", length(time_us), "\n", sep = "" ) # Calculate delta for time variable # ?is this minimum delta? delta <- min(diff(time_us)) cat( strrep(" ", leftwidth), " Delta (", time_name, ") = ", delta, " unit\n", sep = "" ) span <- ((max(time_us) - min(time_us)) / delta) + 1 cat( strrep(" ", leftwidth), " Span (", time_name, ") = ", span, " periods\n", sep = "" ) # Does id * time have unique values? id_time_unique <- length(unique(interaction(x[[id_name]], x[[time_name]]))) == nrow(x) uid <- if (id_time_unique) { "uniquely identifies" } else { "does not uniquely identify" } cat( strrep(" ", leftwidth), " (", id_name, "*", time_name, " ", uid, " each observation)\n", sep = "" ) # Distribution of T_i at min, 5%, 25%, 50%, 75%, 95%, max cat( "\nDistribution of T_i:", "\tmin\t5%\t25%\t50%\t75%\t95%\tmax\n", sep = "" ) id_freq <- table(x[[id_name]]) id_freq_quant <- quantile(id_freq, c(0, 0.05, 0.25, 0.5, 0.75, 0.95, 1)) cat( strrep(" ", 20), "\t", paste(id_freq_quant, collapse = "\t"), "\n", sep = "" ) # Participation pattern by time # Get N IDs per time point, calculate pct of total IDs and rank id_time_freq <- table(x[[id_name]], x[[time_name]]) # Insert columns of 0s for missing time points # id_time_freq <- cbind(id_time_freq, matrix(0, nrow = nrow(id_time_freq), ncol = span - ncol(id_time_freq))) # Add column names for missing time points missing_time_points <- setdiff(seq(time_min, time_max, by = delta), time_us) missing <- matrix( 0, nrow = nrow(id_time_freq), ncol = length(missing_time_points) ) colnames(missing) <- missing_time_points id_time_freq <- cbind(id_time_freq, missing) # Re order columns by name id_time_freq <- id_time_freq[, order(as.numeric(colnames(id_time_freq)))] # Convert to pattern matrix by pasting all columns by row # id_time_freq_char <- as.matrix(id_time_freq) # id_time_freq_char[id_time_freq_char == 0] <- "." id_time_freq_char <- matrix( as.character(id_time_freq), nrow = nrow(id_time_freq) ) id_time_freq_char[id_time_freq_char == "0"] <- "." id_time_freq_pattern <- apply(id_time_freq_char, 1, paste, collapse = "") id_time_freq_pattern_freq <- table(id_time_freq_pattern) id_time_freq_pattern_sorted <- sort( id_time_freq_pattern_freq, decreasing = TRUE ) # Make data.frame with Frequency, Percent, Cumulative Percent of top n_patterns and rest pattern_summary <- data.frame( `Freq.` = as.numeric(id_time_freq_pattern_sorted)[seq_len(n_patterns)] ) pattern_summary[["Percent"]] <- round( (pattern_summary[["Freq."]] / n_ids) * 100, digits = 2 ) pattern_summary[["Cum."]] <- cumsum(pattern_summary[["Percent"]]) pattern_summary[["Pattern"]] <- names(id_time_freq_pattern_sorted)[seq_len( n_patterns )] # Add Freq, Percent, Cumulative Percent of rest pattern_summary <- rbind( pattern_summary, data.frame( `Freq.` = sum(id_time_freq_pattern_sorted[-seq_len(n_patterns)]), Percent = round( (sum(id_time_freq_pattern_sorted[-seq_len(n_patterns)]) / n_ids) * 100, digits = 2 ), `Cum.` = "100.00", Pattern = "(other patterns)" ) ) # Missing pattern is X for time points with data and . for time points with no data missing_pattern <- rep("X", ncol(id_time_freq)) missing_pattern[colSums(id_time_freq) == 0] <- "." missing_pattern <- paste(missing_pattern, collapse = "") # Add row with Total pattern_summary <- rbind( pattern_summary, data.frame( `Freq.` = n_ids, Percent = "100.00", `Cum.` = "", Pattern = missing_pattern ) ) print(pattern_summary, row.names = FALSE) invisible(pattern_summary) } # /rtemis::xtdescribe #' Describe vector #' #' Helper function to describe a vector by showing the first 2 and last value, separated by "..." #' #' @param x Vector to describe. #' @param sort_unique Logical: If TRUE, sort the unique values of the vector before describing. #' #' @return Character string describing the vector. #' #' @author EDG #' @keywords internal #' @noRd vec_describe <- function(x, sort_unique = FALSE) { # sort_unique defaults to FALSE since it needs to be computed already # within xtdescribe xs <- if (sort_unique) sort(unique(x)) else x paste(xs[1], xs[2], "...", xs[length(xs)], sep = ", ") } ================================================ FILE: R/zzz.R ================================================ # ▄▄▄▄ ▄▄▄▄▄▄▄▄ .• ▌ ▄ ·. ▪ .▄▄ · # ▀▄ █·•██ ▀▄.▀··██ ▐███▪██ ▐█ ▀. # ▐▀▀▀▄ ▐█.▪▐▀▀▪▄▐█ ▌▐▌▐█·▐█·▄▀▀▀█▄ # ▐█• █ ▐█▌·▐█▄▄▌██ ██▌▐█▌▐█▌▐█▄▪▐█ # .▀ ▀ ▀▀▀ ▀▀▀ ▀▀ █▪▀▀▀▀▀▀ ▀▀▀▀ # zzz.R # ::rtemis:: # 2016- EDG rtemis.org # rtemis internal environment live <- new.env() live[["parallelized_learners"]] <- c( "LightGBM", "LightRF", "LightRuleFit", "Ranger" ) # msg() sink. NULL = console output (default). # When set to a function, msg()/msg0()/msgstart()/msgdone() route their # structured output through it instead of writing to the console. # Used by rtemislive to forward training messages over a WebSocket. # See `set_msg_sink()`. live[["msg_sink"]] <- NULL # vars rtemis_version <- packageVersion("rtemis") cores_available <- parallelly::availableCores() cores_to_use <- max(cores_available - 3L, 1L) # References # Unicode emojis: https://www.unicode.org/emoji/charts/full-emoji-list.html # Progress reporting setup_progress <- function() { progressr::handlers(global = TRUE) progressr::handlers( progressr::handler_cli( format = "{cli::pb_spin} [{pb_current}/{pb_total}] {pb_status}", format_done = "{cli::col_green(cli::symbol$tick)} Completed {pb_total} tasks", show_after = 0, clear = FALSE ) ) } .onLoad <- function(libname, pkgname) { # S7 S7::methods_register() # Set default options if not already set by user if (is.null(getOption("rtemis_theme"))) { options(rtemis_theme = "whitegrid") } if (is.null(getOption("rtemis_palette"))) { options(rtemis_palette = "rtms") } if (is.null(getOption("rtemis_font"))) { options(rtemis_font = "Helvetica") } # setup_progress() } .onAttach <- function(libname, pkgname) { if (interactive()) { # setup_progress() vline <- paste0( "\n .:", bold(pkgname), " v.", rtemis_version, " \U1F30A", " ", sessionInfo()[[2]], " (", cores_available, " cores available)\n " ) packageStartupMessage(paste0( pkglogo(), vline, fmt_gradient( paste0(rep("\u2500", nchar(vline) - 13L), collapse = ""), colors = c(rt_red, rt_orange, rt_red) ), bold("\n Defaults"), "\n \u2502 ", gray("Theme: "), getOption("rtemis_theme", "whitegrid"), "\n \u2502 ", gray("Font: "), getOption("rtemis_font", "Helvetica"), "\n \u2514 ", gray("Palette: "), getOption("rtemis_palette", "rtms"), bold("\n Resources"), "\n \u2502 ", gray("Docs:"), " https://docs.rtemis.org/r/ml", "\n \u2502 ", gray("Learn R:"), " https://pdsr.rtemis.org", "\n \u2514 ", gray("Cite: "), rtcitation, "\n\n ", fmt("PSA:", col = rt_red, bold = TRUE), " Do not throw data at algorithms. Compute responsibly!" )) } else { packageStartupMessage( paste0( " .:", pkgname, " ", rtemis_version, " \U1F30A", " ", sessionInfo()[[2]] ) ) } } ================================================ FILE: README.md ================================================ [![CRAN status](https://www.r-pkg.org/badges/version/rtemis)](https://CRAN.R-project.org/package=rtemis) [![r-universe](https://rtemis-org.r-universe.dev/badges/rtemis)](https://rtemis-org.r-universe.dev/rtemis) [![R-CMD-check](https://github.com/rtemis-org/rtemis/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/rtemis-org/rtemis/actions/workflows/R-CMD-check.yaml) # rtemis: Advanced Machine Learning & Visualization. [![rtemis cover](https://www.rtemis.org/rtemis-cover.avif)](https://docs.rtemis.org/r/ml/) This is the new version of the rtemis R package and remains under active development. The new version (0.99+) features: - Backend: complete rewrite of the supervised and unsupervised learning backend using the new [**S7** class system](https://github.com/RConsortium/S7), replacing all previous use of R6 and S3 classes. - API: **Functional user-facing API**, to maintain a consistent, user-friendly interface. - Extended use of **`setup_()`** functions, to offer increased transparency of configuration options. - Strict **type checking** and **condition validation** throughout to minimize user error and provide highly focused error messages & suggestions. - Expanded transparent messaging through each step. ![rtemis v1.0.0 console ascii](https://www.rtemis.org/rtemis-splash.webp) ## Installation ### From CRAN ```{r} pak::pak("rtemis") ``` or ```r install.packages("rtemis") ``` ### Latest version from GitHub ```r pak::pak("rtemis-org/rtemis") ``` ### Latest version from `r-universe` ```r pak::repo_add(myuniverse = "https://rtemis-org.r-universe.dev") pak::pak("rtemis") ``` or using `install.packages`: ```r install.packages( 'rtemis', repos = c('https://rtemis-org.r-universe.dev', 'https://cloud.r-project.org') ) ``` ## Installation of dependencies Every `rtemis` call that uses external packages includes a check for required dependencies and will print a message if any are missing. ## Transparent messaging It is essential to maintain transparency of operations at all times. `rtemis` functions often call multiple other functions, sometime recursively. The package uses a formatted messaging system to provide logging output which includes: - Timestamp - Message - Origin (function name) Most function include a `verbosity` argument to control the level of messaging output, with support for three levels: - `0`: silent - `1`: normal messaging - `2`: detailed messaging for debugging ## Text formatting `rtemis` includes an automatic text formatting system, which supports: - plain text output (for output to log files) - ANSI colored output (for R console) - HTML formatted output (for Quarto documents, shiny apps, etc.) ## `setup_` functions Machine learning workflows involve multiple steps, each with their own configuration options. It is essential that a) the user has complete control over each step, while maintaining an intuitive, user-friendly interface, and b) the user input is validated immediately and before a potentially long-running operation is started. The following `setup_` functions are available to configure each step of the workflow: - Supervised Learning: `setup_CART()`, `setup_GAM()`, etc. - Tuning: `setup_GridSearch()` - Clustering: `setup_CMeans()`, `setup_HardCL()`, etc. - Decomposition: `setup_NMF()`, `setup_ICA()`, etc. - Resampling: `setup_Resampler()` - Preprocessing: `setup_Preprocessor()` ## Supervised Learning The following will perform hyperparameter tuning and 10-fold cross-validation. It will train `(3*3*2*5 + 1) * 25 = 2275` models total (!). ```r mod <- train( dat, hyperparameters = setup_LightGBM( num_leaves = 2^(1:3), learning_rate = c(.001, .005, .01), subsample = c(.6, .9) ), outer_resampling_config = setup_Resampler( n_resamples = 25L, type = "StratSub" ) ) ``` ## Clustering ```r clust <- cluster( dat, config = setup_CMeans(k = 4L) ) ``` ## Decomposition ```r decomp <- decompose( dat, config = setup_ICA(k = 12L) ) ``` ## Changes from original implementation & Ongoing work ### Algorithms The original version included a long list of algorithms for supervised and unsupervised learning for testing and experimentation, many of which were rarely used. The initial release of the new version focuses on a smaller set of core algorithms, that will keep growing. ### Visualization The original version included the `mplot3` family of visualization functions using base R graphics and the `dplot3` family using `plotly`. The new release includes the `draw` family of functions, the evolution of the `dplot3` family. ## Documentation The documentation is available at [docs.rtemis.org/r/ml](https://docs.rtemis.org/r/ml), which includes walkthroughs of main features and full API reference. ## Ongoing work There is a lot more coming - both within this package and the other packages in the rtemis framework. ## rtemisalpha The original, unmaintained version of rtemis remains available as `rtemisalpha` at [rtemis-org/rtemis-legacy](https://github.com/rtemis-org/rtemis-legacy). --- © 2016–2026 E.D. Gennatas. Licensed under [GPL (>= 3)](https://www.gnu.org/licenses/gpl-3.0.html). ================================================ FILE: data-raw/create_xt_example.R ================================================ # Create synthetic longitudinal dataset for xtdescribe() example # ::rtemis:: # 2025 EDG set.seed(2025) # Create a small longitudinal dataset with various participation patterns # 10 participants measured at up to 5 time points (years 2020-2024) patient_id <- c( rep(1, 5), # Complete participation (all 5 time points) rep(2, 5), # Complete participation rep(3, 4), # Missing last time point rep(4, 4), # Missing first time point rep(5, 3), # Only first, middle, last rep(6, 2), # Only first two time points rep(7, 2), # Only last two time points rep(8, 3), # Missing 2nd and 4th time points rep(9, 1), # Only baseline rep(10, 1) # Only final time point ) year <- c( 2020:2024, # ID 1: all years 2020:2024, # ID 2: all years 2020:2023, # ID 3: missing 2024 2021:2024, # ID 4: missing 2020 c(2020, 2022, 2024), # ID 5: intermittent 2020:2021, # ID 6: early dropout 2023:2024, # ID 7: late entry c(2020, 2022, 2024), # ID 8: intermittent 2020, # ID 9: baseline only 2024 # ID 10: final only ) # Generate outcome variable (e.g., blood pressure) blood_pressure <- round(rnorm(length(patient_id), mean = 120, sd = 15), 1) # Generate treatment group treatment <- rep(c("A", "B"), length.out = length(patient_id)) # Create data.frame xt_example <- data.frame( patient_id = patient_id, year = year, blood_pressure = blood_pressure, treatment = treatment ) # Save to data/ usethis::use_data(xt_example, overwrite = TRUE) ================================================ FILE: inst/CITATION ================================================ utils::bibentry( header = "To cite rtemis in publications, please use:", # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # BibTeX: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - bibtype = "PhdThesis", title = "Towards Precision Psychiatry: Gray Matter Development and Cognition in Adolescence", author = "Efstathios D. Gennatas", year = "2017", school = "University of Pennsylvania", # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Plain text: # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - textVersion = "Gennatas, E. D. (2017). Towards Precision Psychiatry: Gray Matter Development and Cognition in Adolescence (Doctoral dissertation, University of Pennsylvania)." ) ================================================ FILE: inst/resources/rtemis.utf8 ================================================ ▄▄▄▄ ▄▄▄▄▄▄▄▄ .• ▌ ▄ ·. ▪ .▄▄ · ▀▄ █·•██ ▀▄.▀··██ ▐███▪██ ▐█ ▀. ▐▀▀▀▄ ▐█.▪▐▀▀▪▄▐█ ▌▐▌▐█·▐█·▄▀▀▀█▄ ▐█• █ ▐█▌·▐█▄▄▌██ ██▌▐█▌▐█▌▐█▄▪▐█ .▀ ▀ ▀▀▀ ▀▀▀ ▀▀ █▪▀▀▀▀▀▀ ▀▀▀▀ ================================================ FILE: inst/resources/rtemis2.utf8 ================================================ ██▀███ ▄▄▄█████▓▓█████ ███▄ ▄███▓ ██▓ ██████ ▓██ ▒ ██▒▓ ██▒ ▓▒▓█ ▀ ▓██▒▀█▀ ██▒▓██▒▒██ ▒ ▓██ ░▄█ ▒▒ ▓██░ ▒░▒███ ▓██ █ ▓██░▒██▒░ ▓██▄ ▒██▀▀█▄ ░ ▓██▓ ░ ▒▓█ ▄ ▒██ ░ ▒██ ░██░ ▒ ██▒ ░██▓ ▒██▒ ▒██▒ ░ ░▒████▒▒██▒ ░██▒░██░▒██████▒▒ ░ ▒▓ ░▒▓░ ▒ ░░ ░░ ▒░ ░░ ▒░ ░ ░ ░░▓ ▒ ▒▓▒ ▒ ░ ░▒ ░ ▒░ ░ ░ ░ ░░ ░ ░ ▒ ░░ ░▒ ░ ░ ░░ ░ ░ ░ ░ ░ ▒ ░░ ░ ░ ░ ░ ░ ░ ░ ░ ================================================ FILE: man/available_algorithms.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/algorithmDB.R \name{available_supervised} \alias{available_supervised} \alias{available_algorithms} \alias{available_clustering} \alias{available_decomposition} \title{Available Algorithms} \usage{ available_supervised(verbosity = 1L) available_clustering(verbosity = 1L) available_decomposition(verbosity = 1L) } \arguments{ \item{verbosity}{Integer: Verbosity level.} } \value{ Named list of algorithm descriptions, invisibly. } \description{ Print available algorithms for supervised learning, clustering, and decomposition. } \examples{ available_supervised() available_clustering() available_decomposition() } \author{ EDG } ================================================ FILE: man/available_draw.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/algorithmDB.R \name{available_draw} \alias{available_draw} \title{Available Draw Functions} \usage{ available_draw(verbosity = 1L) } \arguments{ \item{verbosity}{Integer: Verbosity level.} } \value{ Named list of draw function descriptions, invisibly. } \description{ Print available draw functions for visualization. } \examples{ available_draw() } \author{ EDG } ================================================ FILE: man/available_themes.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/theme.R \name{available_themes} \alias{available_themes} \title{Print available \pkg{rtemis} themes} \usage{ available_themes() } \value{ Called for its side effect of printing available themes. } \description{ Print available \pkg{rtemis} themes } \examples{ available_themes() } \author{ EDG } ================================================ FILE: man/calibrate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{calibrate} \alias{calibrate} \title{Calibrate \code{Classification} & \code{ClassificationRes} Models} \usage{ calibrate( x, algorithm = "isotonic", hyperparameters = NULL, verbosity = 1L, ... ) } \arguments{ \item{x}{\code{Classification} or \code{ClassificationRes} object to calibrate.} \item{algorithm}{Character: Algorithm to use to train calibration model.} \item{hyperparameters}{\code{Hyperparameters} object: Setup using one of \verb{setup_*} functions.} \item{verbosity}{Integer: Verbosity level.} \item{...}{Additional arguments passed to specific methods.} } \value{ Calibrated model object. } \description{ Generic function to calibrate binary classification models. } \details{ The goal of calibration is to adjust the predicted probabilities of a binary classification model so that they better reflect the true probabilities (i.e. empirical risk) of the positive class. } \section{Method-specific parameters}{ \strong{For \code{Classification} objects:} \itemize{ \item \code{predicted_probabilities}: Numeric vector of predicted probabilities \item \code{true_labels}: Factor of true class labels } \strong{For \code{ClassificationRes} objects:} \itemize{ \item \code{resampler_config}: \code{ResamplerConfig} object for calibration training \item \code{train_verbosity}: Integer controlling calibration model training output } } \examples{ # --- Calibrate Classification --- dat <- iris[51:150, ] res <- resample(dat) dat$Species <- factor(dat$Species) dat_train <- dat[res[[1]], ] dat_test <- dat[-res[[1]], ] # Train GLM on a training/test split mod_c_glm <- train( x = dat_train, dat_test = dat_test, algorithm = "glm" ) # Calibrate the `Classification` by defining `predicted_probabilities` and `true_labels`, # in this case using the training data, but it could be a separate calibration dataset. mod_c_glm_cal <- calibrate( mod_c_glm, predicted_probabilities = mod_c_glm$predicted_prob_training, true_labels = mod_c_glm$y_training ) mod_c_glm_cal # --- Calibrate ClassificationRes --- # Train GLM with cross-validation resmod_c_glm <- train( x = dat, algorithm = "glm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold") ) # Calibrate the `ClassificationRes` using the same resampling configuration as used for training. resmod_c_glm_cal <- calibrate(resmod_c_glm) resmod_c_glm_cal } \author{ EDG } ================================================ FILE: man/check_data.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/check_data.R \name{check_data} \alias{check_data} \title{Check Data} \usage{ check_data( x, name = NULL, get_duplicates = TRUE, get_na_case_pct = FALSE, get_na_feature_pct = FALSE ) } \arguments{ \item{x}{tabular data: Input to be checked.} \item{name}{Character: Name of dataset.} \item{get_duplicates}{Logical: If TRUE, check for duplicate cases.} \item{get_na_case_pct}{Logical: If TRUE, calculate percent of NA values per case.} \item{get_na_feature_pct}{Logical: If TRUE, calculate percent of NA values per feature.} } \value{ \code{CheckData} object. } \description{ Check Data } \examples{ n <- 1000 x <- rnormmat(n, 50, return_df = TRUE) x$char1 <- sample(letters, n, TRUE) x$char2 <- sample(letters, n, TRUE) x$fct <- factor(sample(letters, n, TRUE)) x <- rbind(x, x[1, ]) x$const <- 99L x[sample(nrow(x), 20), 3] <- NA x[sample(nrow(x), 20), 10] <- NA x$fct[30:35] <- NA check_data(x) } \author{ EDG } ================================================ FILE: man/choose_theme.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/theme.R \name{choose_theme} \alias{choose_theme} \title{Select an rtemis theme} \usage{ choose_theme( x = c("white", "whitegrid", "whiteigrid", "black", "blackgrid", "blackigrid", "darkgray", "darkgraygrid", "darkgrayigrid", "lightgraygrid", "mediumgraygrid"), override = NULL ) } \arguments{ \item{x}{Character: Name of theme to select. If not defined, will use \code{getOption("rtemis_theme", "whitegrid")}.} \item{override}{Optional List: Theme parameters to override defaults.} } \value{ \code{Theme} object. } \description{ Select an rtemis theme } \details{ If \code{x} is not defined, \code{choose_theme()} will use \code{getOption("rtemis_theme", "whitegrid")} to select the theme. This allows users to set a default theme for all rtemis plots by setting \code{options(rtemis_theme = "theme_name")} at any point. } \examples{ # Get default theme set by options(rtemis_theme = "theme_name"). # If not set, defaults to "whitegrid": choose_theme() # Get darkgraygrid theme. Same as `theme_darkgraygrid()`: choose_theme("darkgraygrid") # This will use the default theme, and override the foreground color to red: choose_theme(override = list(fg = "#ff0000")) } \author{ EDG } ================================================ FILE: man/class_imbalance.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_supervised.R \name{class_imbalance} \alias{class_imbalance} \title{Class Imbalance} \usage{ class_imbalance(x) } \arguments{ \item{x}{Vector, factor: Outcome.} } \value{ Numeric. } \description{ Calculate class imbalance as given by: \deqn{I = K\cdot\sum_{i=1}^K (n_i/N - 1/K)^2}{I = K * sum(n_i/N - 1/K)^2} where \eqn{K} is the number of classes, and \eqn{n_i} is the number of instances of class \eqn{i} } \examples{ # iris is perfectly balanced class_imbalance(iris[["Species"]]) # Simulate imbalanced outcome x <- factor(sample(c("A", "B"), size = 500L, replace = TRUE, prob = c(0.9, 0.1))) class_imbalance(x) } \author{ EDG } ================================================ FILE: man/classification_metrics.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/metrics.R \name{classification_metrics} \alias{classification_metrics} \title{Classification Metrics} \usage{ classification_metrics( true_labels, predicted_labels, predicted_prob = NULL, binclasspos = 2L, calc_auc = TRUE, calc_brier = TRUE, auc_method = "lightAUC", sample = character(), verbosity = 0L ) } \arguments{ \item{true_labels}{Factor: True labels.} \item{predicted_labels}{Factor: predicted values.} \item{predicted_prob}{Numeric vector: predicted probabilities.} \item{binclasspos}{Integer: Factor level position of the positive class in binary classification.} \item{calc_auc}{Logical: If TRUE, calculate AUC. May be slow in very large datasets.} \item{calc_brier}{Logical: If TRUE, calculate Brier_Score.} \item{auc_method}{Character: "lightAUC", "pROC", "ROCR".} \item{sample}{Character: Sample name.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{ClassificationMetrics} object. } \description{ Classification Metrics } \details{ Note that auc_method = "pROC" is the only one that will output an AUC even if one or more predicted probabilities are NA. } \examples{ # Assume positive class is "b" true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b")) predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a")) predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39) classification_metrics(true_labels, predicted_labels, predicted_prob) classification_metrics(true_labels, predicted_labels, 1 - predicted_prob, binclasspos = 1L) } \author{ EDG } ================================================ FILE: man/clean_colnames.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_strings.R \name{clean_colnames} \alias{clean_colnames} \title{Clean column names} \usage{ clean_colnames(x, lowercase = FALSE, uppercase = FALSE, titlecase = FALSE) } \arguments{ \item{x}{Character vector OR any object with \code{colnames()} method, like matrix, data.frame, data.table, tibble, etc.} \item{lowercase}{Logical: If TRUE, convert to lowercase.} \item{uppercase}{Logical: If TRUE, convert to uppercase.} \item{titlecase}{Logical: If TRUE, convert to Title Case.} } \value{ Character vector with cleaned names. } \description{ Clean column names by replacing all spaces and punctuation with a single underscore } \examples{ clean_colnames(iris, lowercase = FALSE, uppercase = FALSE, titlecase = FALSE) } \author{ EDG } ================================================ FILE: man/clean_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_strings.R \name{clean_names} \alias{clean_names} \title{Clean names} \usage{ clean_names(x, sep = "_", prefix_digits = "V_") } \arguments{ \item{x}{Character vector.} \item{sep}{Character: Separator to replace symbols with.} \item{prefix_digits}{Character: prefix to add to names beginning with a digit. Set to NA to skip.} } \value{ Character vector. } \description{ Clean character vector by replacing all symbols and sequences of symbols with single underscores, ensuring no name begins or ends with a symbol } \examples{ x <- c("Patient ID", "_Date-of-Birth", "SBP (mmHg)") x clean_names(x) clean_names(x, sep = " ") } \author{ EDG } ================================================ FILE: man/cluster.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cluster.R \name{cluster} \alias{cluster} \title{Perform Clustering} \usage{ cluster(x, algorithm = "KMeans", config = NULL, verbosity = 1L) } \arguments{ \item{x}{Matrix or data.frame: Data to cluster. Rows are cases to be clustered.} \item{algorithm}{Character: Clustering algorithm.} \item{config}{List: Algorithm-specific config.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{Clustering} object. } \description{ Perform clustering on the rows (usually cases) of a dataset. } \details{ See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation. } \examples{ iris_km <- cluster(exc(iris, "Species"), algorithm = "KMeans") } \author{ EDG } ================================================ FILE: man/col2grayscale.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_color.R \name{col2grayscale} \alias{col2grayscale} \title{Color to Grayscale} \usage{ col2grayscale(x, what = c("color", "decimal")) } \arguments{ \item{x}{Color to convert to grayscale} \item{what}{Character: "color" returns a hexadecimal color, "decimal" returns a decimal between 0 and 1} } \value{ Character: color hex code. } \description{ Convert a color to grayscale } \details{ Uses the NTSC grayscale conversion: 0.299 * R + 0.587 * G + 0.114 * B } \examples{ col2grayscale("red") col2grayscale("red", "dec") } \author{ EDG } ================================================ FILE: man/color_adjust.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_color.R \name{color_adjust} \alias{color_adjust} \title{Adjust HSV Color} \usage{ color_adjust(color, alpha = NULL, hue = 0, sat = 0, val = 0) } \arguments{ \item{color}{Input color. Any format that grDevices::col2rgb() recognizes} \item{alpha}{Numeric: Scale alpha by this amount. Future: replace with absolute setting} \item{hue}{Float: How much hue to add to \code{color}} \item{sat}{Float: How much saturation to add to \code{color}} \item{val}{Float: How much to increase value of \code{color} by} } \value{ Adjusted color } \description{ Modify alpha, hue, saturation and value (HSV) of a color } \examples{ previewcolor(c(teal = "#00ffff", teal50 = color_adjust("#00ffff", alpha = 0.5))) } \author{ EDG } ================================================ FILE: man/ddSci.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ddSci.R \name{ddSci} \alias{ddSci} \title{Format Numbers for Printing} \usage{ ddSci(x, decimal_places = 2, hi = 1e+06, as_numeric = FALSE) } \arguments{ \item{x}{Vector of numbers} \item{decimal_places}{Integer: Return this many decimal places.} \item{hi}{Float: Threshold at or above which scientific notation is used.} \item{as_numeric}{Logical: If TRUE, convert to numeric before returning. This will not force all numbers to print 2 decimal places. For example: 1.2035 becomes "1.20" if \code{as_numeric = FALSE}, but 1.2 otherwise This can be helpful if you want to be able to use the output as numbers / not just for printing.} } \value{ Formatted number } \description{ 2 Decimal places, otherwise scientific notation } \details{ Numbers will be formatted to 2 decimal places, unless this results in 0.00 (e.g. if input was .0032), in which case they will be converted to scientific notation with 2 significant figures. \code{ddSci} will return \code{0.00} if the input is exactly zero. This function can be used to format numbers in plots, on the console, in logs, etc. } \examples{ x <- .34876549 ddSci(x) # "0.35" x <- .00000000457823 ddSci(x) # "4.6e-09" } \author{ EDG } ================================================ FILE: man/ddb_collect.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ddb.R \name{ddb_collect} \alias{ddb_collect} \title{Collect a lazy-read duckdb table} \usage{ ddb_collect(sql, progress = TRUE, returnobj = c("data.frame", "data.table")) } \arguments{ \item{sql}{Character: DuckDB SQL query, usually output of \link{ddb_data} with \code{collect = FALSE}} \item{progress}{Logical: If TRUE, show progress bar} \item{returnobj}{Character: data.frame or data.table: class of object to return} } \value{ \code{data.frame} or \code{data.table}. } \description{ Collect a table read with \code{ddb_data(x, collect = FALSE)} } \examples{ \dontrun{ # Requires local CSV file; replace with your own path sql <- ddb_data("/Data/iris.csv", collect = FALSE) ir <- ddb_collect(sql) } } \author{ EDG } ================================================ FILE: man/ddb_data.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ddb.R \name{ddb_data} \alias{ddb_data} \title{Read CSV using DuckDB} \usage{ ddb_data( filename, datadir = NULL, sep = ",", header = TRUE, quotechar = "", ignore_errors = TRUE, make_unique = TRUE, select_columns = NULL, filter_column = NULL, filter_vals = NULL, character2factor = FALSE, collect = TRUE, progress = TRUE, returnobj = c("data.table", "data.frame"), data.table.key = NULL, clean_colnames = TRUE, verbosity = 1L ) } \arguments{ \item{filename}{Character: file name; either full path or just the file name, if \code{datadir} is also provided.} \item{datadir}{Character: Optional path if \code{filename} is not full path.} \item{sep}{Character: Field delimiter/separator.} \item{header}{Logical: If TRUE, first line will be read as column names.} \item{quotechar}{Character: Quote character.} \item{ignore_errors}{Logical: If TRUE, ignore parsing errors (sometimes it's either this or no data, so).} \item{make_unique}{Logical: If TRUE, keep only unique rows.} \item{select_columns}{Character vector: Column names to select.} \item{filter_column}{Character: Name of column to filter on, e.g. "ID".} \item{filter_vals}{Numeric or Character vector: Values in \code{filter_column} to keep. \code{filter_column} to keep.} \item{character2factor}{Logical: If TRUE, convert character columns to factors.} \item{collect}{Logical: If TRUE, collect data and return structure class as defined by \code{returnobj}.} \item{progress}{Logical: If TRUE, print progress (no indication this works).} \item{returnobj}{Character: "data.frame" or "data.table" object class to return. If "data.table", data.frame object returned from \code{DBI::dbGetQuery} is passed to \code{data.table::setDT}; will add to execution time if very large, but then that's when you need a data.table.} \item{data.table.key}{Character: If set, this corresponds to a column name in the dataset. This column will be set as key in the data.table output.} \item{clean_colnames}{Logical: If TRUE, clean colnames with \link{clean_colnames}.} \item{verbosity}{Integer: Verbosity level.} } \value{ data.frame or data.table if \code{collect} is TRUE, otherwise a character with the SQL query } \description{ Lazy-read a CSV file, optionally: filter rows, remove duplicates, clean column names, convert character to factor, collect. } \examples{ \dontrun{ # Requires local CSV file; replace with your own path ir <- ddb_data("/Data/massive_dataset.csv", filter_column = "ID", filter_vals = 8001:9999 ) } } \author{ EDG } ================================================ FILE: man/decomp.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/decomp.R \name{decomp} \alias{decomp} \title{Perform Data Decomposition} \usage{ decomp(x, algorithm = "ICA", config = NULL, verbosity = 1L) } \arguments{ \item{x}{Matrix or data frame: Input data.} \item{algorithm}{Character: Decomposition algorithm.} \item{config}{DecompositionConfig: Algorithm-specific config.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{Decomposition} object. } \description{ Perform linear or non-linear decomposition of numeric data. } \details{ See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation. } \examples{ iris_pca <- decomp(exc(iris, "Species"), algorithm = "PCA") } \author{ EDG } ================================================ FILE: man/describe.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{describe} \alias{describe} \title{Describe object} \usage{ describe(x, ...) } \arguments{ \item{x}{R object to describe. See method documentation for supported classes.} \item{...}{Additional arguments passed to methods. See details.} } \description{ Describe object } \details{ Extra arguments for \code{factor} method: \itemize{ \item \code{max_n}: Integer: Return counts for up to this many levels. \item \code{return_ordered}: Logical: If TRUE, return levels ordered by count, otherwise return in level order. \item \code{verbosity}: Integer: Verbosity level. } } \examples{ # --- For `Supervised` objects --- species_lightrf <- train(iris, algorithm = "lightrf") describe(species_lightrf) # --- For `SupervisedRes` objects --- mod <- train(iris, algorithm = "CART", outer_resampling_config = setup_Resampler()) describe(mod) # --- For factors --- # Small number of levels describe(iris[["Species"]]) # Large number of levels: show top n by count x <- factor(sample(letters, 1000, TRUE)) describe(x) describe(x, 3) describe(x, 3, return_ordered = FALSE) } \author{ EDG } ================================================ FILE: man/df_movecolumn.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_df.R \name{df_movecolumn} \alias{df_movecolumn} \title{Move data frame column} \usage{ df_movecolumn(x, colname, to = ncol(x)) } \arguments{ \item{x}{data.frame.} \item{colname}{Character: Name of column you want to move.} \item{to}{Integer: Which column position to move the vector to. Default = \code{ncol(x)} i.e. the last column.} } \value{ data.frame } \description{ Move data frame column } \examples{ ir <- df_movecolumn(iris, colname = "Species", to = 1L) } \author{ EDG } ================================================ FILE: man/df_nunique_perfeat.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_df.R \name{df_nunique_perfeat} \alias{df_nunique_perfeat} \title{Unique values per feature} \usage{ df_nunique_perfeat(x, excludeNA = FALSE) } \arguments{ \item{x}{matrix or data frame input} \item{excludeNA}{Logical: If TRUE, exclude NA values from unique count.} } \value{ Vector, integer of length \code{NCOL(x)} with number of unique values per column/feature } \description{ Get number of unique values per features } \examples{ df_nunique_perfeat(iris) } \author{ EDG } ================================================ FILE: man/dot-list_to_Hyperparameters.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{.list_to_Hyperparameters} \alias{.list_to_Hyperparameters} \title{Convert a list to a Hyperparameters object} \usage{ .list_to_Hyperparameters(x) } \arguments{ \item{x}{Named list with two elements: \describe{ \item{\code{algorithm}}{Character: algorithm name, e.g. \code{"GLM"}, \code{"RF"}.} \item{\code{hyperparameters}}{Named list of hyperparameter name-value pairs passed to the corresponding \verb{setup_()} function.} }} } \value{ A \code{Hyperparameters} object as returned by \verb{setup_()}. } \description{ Internal function used by \code{rtemis.server} to reconstruct a \code{Hyperparameters} object from a wire-format list. Not intended for direct use by end users. } \author{ EDG } \keyword{internal} ================================================ FILE: man/dot-list_to_ResamplerConfig.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/05_Resampler.R \name{.list_to_ResamplerConfig} \alias{.list_to_ResamplerConfig} \title{Convert a list to a ResamplerConfig object} \usage{ .list_to_ResamplerConfig(x) } \arguments{ \item{x}{Named list with the following elements: \describe{ \item{\code{type}}{Character: resampler type — one of \code{"KFold"}, \code{"StratSub"}, \code{"StratBoot"}, \code{"Bootstrap"}, \code{"LOOCV"}, \code{"Custom"}.} \item{\code{n}}{Integer: number of resamples (not used for \code{"LOOCV"}).} \item{\code{train_p}}{Numeric: training proportion (used by \code{"StratSub"} and \code{"StratBoot"}).} \item{\code{stratify_var}}{Character or \code{NULL}: stratification variable name.} \item{\code{strat_n_bins}}{Integer: number of bins for stratification.} \item{\code{target_length}}{Integer or \code{NULL}: target resample length (\code{"StratBoot"} only).} \item{\code{id_strat}}{Character or \code{NULL}: ID stratification variable.} \item{\code{seed}}{Integer or \code{NULL}: random seed.} }} } \value{ A \code{ResamplerConfig} object of the appropriate subtype. } \description{ Internal function used by \code{rtemis.server} and \code{SuperConfig} deserialization to reconstruct a \code{ResamplerConfig} object from a named list. Not intended for direct use by end users. } \author{ EDG } \keyword{internal} ================================================ FILE: man/dot-list_to_TunerConfig.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/06_Tuner.R \name{.list_to_TunerConfig} \alias{.list_to_TunerConfig} \title{Convert a list to a TunerConfig object} \usage{ .list_to_TunerConfig(x) } \arguments{ \item{x}{Named list with two elements: \describe{ \item{\code{type}}{Character: tuner type. Currently only \code{"GridSearch"} is supported.} \item{\code{config}}{Named list of tuner configuration fields. For \code{"GridSearch"}: \code{resampler_config} (a list accepted by \code{\link[=.list_to_ResamplerConfig]{.list_to_ResamplerConfig()}}), \code{search_type}, \code{randomize_p}, \code{metrics_aggregate_fn}, \code{metric}, and \code{maximize}.} }} } \value{ A \code{TunerConfig} object (currently a \code{GridSearchConfig}). } \description{ Internal function used by \code{rtemis.server} and \code{SuperConfig} deserialization to reconstruct a \code{TunerConfig} object from a named list. Not intended for direct use by end users. } \author{ EDG } \keyword{internal} ================================================ FILE: man/draw_3Dscatter.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_3Dscatter.R \name{draw_3Dscatter} \alias{draw_3Dscatter} \title{Interactive 3D Scatter Plots} \usage{ draw_3Dscatter( x, y = NULL, z = NULL, fit = NULL, cluster = NULL, cluster_config = NULL, group = NULL, formula = NULL, rsq = TRUE, mode = "markers", order_on_x = NULL, main = NULL, xlab = NULL, ylab = NULL, zlab = NULL, alpha = 0.8, bg = NULL, plot_bg = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), axes_square = FALSE, group_names = NULL, font_size = 16, marker_col = NULL, marker_size = 8, fit_col = NULL, fit_alpha = 0.7, fit_lwd = 2.5, tick_font_size = 12, spike_col = NULL, legend = NULL, legend_xy = c(0, 1), legend_xanchor = "left", legend_yanchor = "auto", legend_orientation = "v", legend_col = NULL, legend_bg = "#FFFFFF00", legend_border_col = "#FFFFFF00", legend_borderwidth = 0, legend_group_gap = 0, margin = list(t = 30, b = 0, l = 0, r = 0), fit_params = NULL, width = NULL, height = NULL, padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", verbosity = 0L, filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) } \arguments{ \item{x}{Numeric, vector/data.frame/list: x-axis data.} \item{y}{Numeric, vector/data.frame/list: y-axis data.} \item{z}{Numeric, vector/data.frame/list: z-axis data.} \item{fit}{Character: Fit method.} \item{cluster}{Character: Clustering method.} \item{cluster_config}{List: Config for clustering.} \item{group}{Factor: Grouping variable.} \item{formula}{Formula: Formula for non-linear least squares fit.} \item{rsq}{Logical: If TRUE, print R-squared values in legend if \code{fit} is set.} \item{mode}{Character, vector: "markers", "lines", "markers+lines".} \item{order_on_x}{Logical: If TRUE, order \code{x} and \code{y} on \code{x}.} \item{main}{Character: Main title.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{zlab}{Character: z-axis label.} \item{alpha}{Numeric: Alpha for markers.} \item{bg}{Background color.} \item{plot_bg}{Plot background color.} \item{theme}{\code{Theme} object.} \item{palette}{Character vector: Colors to use.} \item{axes_square}{Logical: If TRUE, draw a square plot.} \item{group_names}{Character: Names for groups.} \item{font_size}{Numeric: Font size.} \item{marker_col}{Color for markers.} \item{marker_size}{Numeric: Marker size.} \item{fit_col}{Color for fit line.} \item{fit_alpha}{Numeric: Alpha for fit line.} \item{fit_lwd}{Numeric: Line width for fit line.} \item{tick_font_size}{Numeric: Tick font size.} \item{spike_col}{Spike lines color.} \item{legend}{Logical: If TRUE, draw legend.} \item{legend_xy}{Numeric: Position of legend.} \item{legend_xanchor}{Character: X anchor for legend.} \item{legend_yanchor}{Character: Y anchor for legend.} \item{legend_orientation}{Character: Orientation of legend.} \item{legend_col}{Color for legend text.} \item{legend_bg}{Color for legend background.} \item{legend_border_col}{Color for legend border.} \item{legend_borderwidth}{Numeric: Border width for legend.} \item{legend_group_gap}{Numeric: Gap between legend groups.} \item{margin}{Numeric, named list: Margins for top, bottom, left, right.} \item{fit_params}{\code{Hyperparameters} for fit.} \item{width}{Numeric: Width of plot.} \item{height}{Numeric: Height of plot.} \item{padding}{Numeric: Graph padding.} \item{displayModeBar}{Logical: If TRUE, display mode bar.} \item{modeBar_file_format}{Character: File format for mode bar.} \item{verbosity}{Integer: Verbosity level.} \item{filename}{Character: Filename to save plot.} \item{file_width}{Numeric: Width of saved file.} \item{file_height}{Numeric: Height of saved file.} \item{file_scale}{Numeric: Scale of saved file.} } \value{ A \code{plotly} object. } \description{ Draw interactive 3D scatter plots using \code{plotly}. } \details{ See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation. Note that \code{draw_3Dscatter} uses the theme's \code{plot_bg} as \code{grid_col}. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} draw_3Dscatter(iris, group = iris$Species, theme = theme_darkgraygrid()) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_bar.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_bar.R \name{draw_bar} \alias{draw_bar} \title{Interactive Barplots} \usage{ draw_bar( x, main = NULL, xlab = NULL, ylab = NULL, alpha = 1, horizontal = FALSE, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), barmode = c("group", "relative", "stack", "overlay"), group_names = NULL, order_by_val = FALSE, ylim = NULL, hovernames = NULL, feature_names = NULL, font_size = 16, annotate = FALSE, annotate_col = theme[["labs_col"]], legend = NULL, legend_col = NULL, legend_xy = c(1, 1), legend_orientation = "v", legend_xanchor = "left", legend_yanchor = "auto", hline = NULL, hline_col = NULL, hline_width = 1, hline_dash = "solid", hline_annotate = NULL, hline_annotation_x = 1, margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0), automargin_x = TRUE, automargin_y = TRUE, padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, verbosity = 0L ) } \arguments{ \item{x}{vector (possibly named), matrix, or data.frame: If matrix or data.frame, rows are groups (can be 1 row), columns are features} \item{main}{Character: Main plot title.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{alpha}{Float (0, 1]: Transparency for bar colors.} \item{horizontal}{Logical: If TRUE, plot bars horizontally} \item{theme}{\code{Theme} object.} \item{palette}{Character vector: Colors to use.} \item{barmode}{Character: Type of bar plot to make: "group", "relative", "stack", "overlay". Default = "group". Use "relative" for stacked bars, wich handles negative values correctly, unlike "stack", as of writing.} \item{group_names}{Character, vector, length = NROW(x): Group names. Default = NULL, which uses \code{rownames(x)}} \item{order_by_val}{Logical: If TRUE, order bars by increasing value. Only use for single group data.} \item{ylim}{Float, vector, length 2: y-axis limits.} \item{hovernames}{Character, vector: Optional character vector to show on hover over each bar.} \item{feature_names}{Character, vector, length = NCOL(x): Feature names. Default = NULL, which uses \code{colnames(x)}} \item{font_size}{Float: Font size for all labels.} \item{annotate}{Logical: If TRUE, annotate stacked bars} \item{annotate_col}{Color for annotations} \item{legend}{Logical: If TRUE, draw legend. Default = NULL, and will be turned on if there is more than one feature present} \item{legend_col}{Color: Legend text color. Default = NULL, determined by theme} \item{legend_xy}{Numeric, vector, length 2: x and y for plotly's legend} \item{legend_orientation}{"v" or "h" for vertical or horizontal} \item{legend_xanchor}{Character: Legend's x anchor: "left", "center", "right", "auto"} \item{legend_yanchor}{Character: Legend's y anchor: "top", "middle", "bottom", "auto"} \item{hline}{Float: If defined, draw a horizontal line at this y value.} \item{hline_col}{Color for \code{hline}.} \item{hline_width}{Float: Width for \code{hline}.} \item{hline_dash}{Character: Type of line to draw: "solid", "dot", "dash", "longdash", "dashdot", or "longdashdot"} \item{hline_annotate}{Character: Text of horizontal line annotation if \code{hline} is set} \item{hline_annotation_x}{Numeric: x position to place annotation with paper as reference. 0: to the left of the plot area; 1: to the right of the plot area} \item{margin}{Named list: plot margins.} \item{automargin_x}{Logical: If TRUE, automatically set x-axis margins} \item{automargin_y}{Logical: If TRUE, automatically set y-axis margins} \item{padding}{Integer: N pixels to pad plot.} \item{displayModeBar}{Logical: If TRUE, show plotly's modebar} \item{modeBar_file_format}{Character: "svg", "png", "jpeg", "pdf" / any output file type supported by plotly and your system} \item{filename}{Character: Path to file to save static plot.} \item{file_width}{Integer: File width in pixels for when \code{filename} is set.} \item{file_height}{Integer: File height in pixels for when \code{filename} is set.} \item{file_scale}{Numeric: If saving to file, scale plot by this number} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{plotly} object. } \description{ Draw interactive barplots using \code{plotly} } \details{ See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} draw_bar(VADeaths, legend_xy = c(0, 1)) draw_bar(VADeaths, legend_xy = c(1, 1), legend_xanchor = "left") # simple individual bars a <- c(4, 7, 2) draw_bar(a) # if input is a data.frame, each row is a group and each column is a feature b <- data.frame(x = c(3, 5, 7), y = c(2, 1, 8), z = c(4, 5, 2)) rownames(b) <- c("Jen", "Ben", "Ren") draw_bar(b) # stacked draw_bar(b, barmode = "stack") \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_box.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_box.R \name{draw_box} \alias{draw_box} \title{Interactive Boxplots & Violin plots} \usage{ draw_box( x, time = NULL, time_bin = c("year", "quarter", "month", "day"), type = c("box", "violin"), group = NULL, x_transform = c("none", "scale", "minmax"), main = NULL, xlab = "", ylab = NULL, alpha = 0.6, bg = NULL, plot_bg = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), boxpoints = "outliers", quartilemethod = "linear", xlim = NULL, ylim = NULL, violin_box = TRUE, orientation = "v", annotate_n = FALSE, annotate_n_y = 1, annotate_mean = FALSE, annotate_meansd = FALSE, annotate_meansd_y = 1, annotate_col = theme[["labs_col"]], xnames = NULL, group_lines = FALSE, group_lines_dash = "dot", group_lines_col = NULL, group_lines_alpha = 0.5, labelify = TRUE, order_by_fn = NULL, font_size = 16, ylab_standoff = 18, legend = NULL, legend_col = NULL, legend_xy = NULL, legend_orientation = "v", legend_xanchor = "auto", legend_yanchor = "auto", xaxis_type = "category", cataxis_tickangle = "auto", margin = list(b = 65, l = 65, t = 50, r = 12, pad = 0), automargin_x = TRUE, automargin_y = TRUE, boxgroupgap = NULL, hovertext = NULL, show_n = FALSE, pvals = NULL, htest = "none", htest_compare = 0, htest_y = NULL, htest_annotate = TRUE, htest_annotate_x = 0, htest_annotate_y = -0.065, htest_star_col = theme[["labs_col"]], htest_bracket_col = theme[["labs_col"]], starbracket_pad = c(0.04, 0.05, 0.09), use_plotly_group = FALSE, width = NULL, height = NULL, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, mathjax = NULL ) } \arguments{ \item{x}{Vector or List of vectors: Input} \item{time}{Date or date-time vector} \item{time_bin}{Character: "year", "quarter", "month", or "day". Period to bin by} \item{type}{Character: "box" or "violin"} \item{group}{Factor to group by} \item{x_transform}{Character: "none", "scale", or "minmax" to use raw values, scaled and centered values or min-max normalized to 0-1, respectively. Transform is applied to each variable before grouping, so that groups are comparable} \item{main}{Character: Plot title.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{alpha}{Float (0, 1]: Transparency for box colors.} \item{bg}{Color: Background color.} \item{plot_bg}{Color: Background color for plot area.} \item{theme}{\code{Theme} object.} \item{palette}{Character vector: Colors to use.} \item{boxpoints}{Character or FALSE: "all", "suspectedoutliers", "outliers" See \url{https://plotly.com/r/box-plots/#choosing-the-algorithm-for-computing-quartiles}} \item{quartilemethod}{Character: "linear", "exclusive", "inclusive"} \item{xlim}{Numeric vector: x-axis limits} \item{ylim}{Numeric vector: y-axis limits} \item{violin_box}{Logical: If TRUE and type is "violin" show box within violin plot} \item{orientation}{Character: "v" or "h" for vertical, horizontal} \item{annotate_n}{Logical: If TRUE, annotate with N in each box} \item{annotate_n_y}{Numeric: y position for \code{annotate_n}} \item{annotate_mean}{Logical: If TRUE, annotate with mean of each box} \item{annotate_meansd}{Logical: If TRUE, annotate with mean (SD) of each box} \item{annotate_meansd_y}{Numeric: y position for \code{annotate_meansd}} \item{annotate_col}{Color for annotations} \item{xnames}{Character, vector, length = NROW(x): x-axis names. Default = NULL, which tries to set names automatically.} \item{group_lines}{Logical: If TRUE, add separating lines between groups of boxplots} \item{group_lines_dash}{Character: "solid", "dot", "dash", "longdash", "dashdot", or "longdashdot"} \item{group_lines_col}{Color for \code{group_lines}} \item{group_lines_alpha}{Numeric: transparency for \code{group_lines_col}} \item{labelify}{Logical: If TRUE, \link{labelify} x names} \item{order_by_fn}{Function: If defined, order boxes by increasing value of this function (e.g. median).} \item{font_size}{Float: Font size for all labels.} \item{ylab_standoff}{Numeric: Standoff for y-axis label} \item{legend}{Logical: If TRUE, draw legend.} \item{legend_col}{Color: Legend text color. Default = NULL, determined by the theme.} \item{legend_xy}{Float, vector, length 2: Relative x, y position for legend.} \item{legend_orientation}{"v" or "h" for vertical, horizontal} \item{legend_xanchor}{Character: Legend's x anchor: "left", "center", "right", "auto"} \item{legend_yanchor}{Character: Legend's y anchor: "top", "middle", "bottom", "auto"} \item{xaxis_type}{Character: "linear", "log", "date", "category", "multicategory"} \item{cataxis_tickangle}{Numeric: Angle for categorical axis tick labels} \item{margin}{Named list: plot margins.} \item{automargin_x}{Logical: If TRUE, automatically set x-axis margins} \item{automargin_y}{Logical: If TRUE, automatically set y-axis margins} \item{boxgroupgap}{Numeric: Sets the gap (in plot fraction) between boxes of the same location coordinate} \item{hovertext}{Character vector: Text to show on hover for each data point} \item{show_n}{Logical: If TRUE, show N in each box} \item{pvals}{Numeric vector: Precomputed p-values. Should correspond to each box. Bypasses \code{htest} and \code{htest_compare}. Requires \code{group} to be set} \item{htest}{Character: e.g. "t.test", "wilcox.test" to compare each box to the \emph{first} box. If grouped, compare within each group to the first box. If p-value of test is less than \code{htest.thresh}, add asterisk above/ to the side of each box} \item{htest_compare}{Integer: 0: Compare all distributions against the first one; 2: Compare every second box to the one before it. Requires \code{group} to be set} \item{htest_y}{Numeric: y coordinate for \code{htest} annotation} \item{htest_annotate}{Logical: if TRUE, include htest annotation} \item{htest_annotate_x}{Numeric: x-axis paper coordinate for htest annotation} \item{htest_annotate_y}{Numeric: y-axis paper coordinate for htest annotation} \item{htest_star_col}{Color for htest annotation stars} \item{htest_bracket_col}{Color for htest annotation brackets} \item{starbracket_pad}{Numeric: Padding for htest annotation brackets} \item{use_plotly_group}{If TRUE, use plotly's \code{group} arg to group boxes.} \item{width}{Numeric: Force plot size to this width. Default = NULL, i.e. fill available space} \item{height}{Numeric: Force plot size to this height. Default = NULL, i.e. fill available space} \item{displayModeBar}{Logical: If TRUE, show plotly's modebar} \item{modeBar_file_format}{Character: "svg", "png", "jpeg", "pdf"} \item{filename}{Character: Path to file to save static plot.} \item{file_width}{Integer: File width in pixels for when \code{filename} is set.} \item{file_height}{Integer: File height in pixels for when \code{filename} is set.} \item{file_scale}{Numeric: If saving to file, scale plot by this number} \item{mathjax}{Optional Character \{"local", "cdn"\}: Whether to use local or CDN version of MathJax for rendering mathematical annotations.} } \value{ \code{plotly} object. } \description{ Draw interactive boxplots or violin plots using \pkg{plotly} } \details{ See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation. For multiple box plots, the recommendation is: \itemize{ \item \code{x=dat[, columnindex]} for multiple variables of a data.frame \item \code{x=list(a=..., b=..., etc.)} for multiple variables of potentially different length \item \code{x=split(var, group)} for one variable with multiple groups: group names appear below boxplots \item \verb{x=dat[, columnindex], group = factor} for grouping multiple variables: group names appear in legend } If \code{orientation == "h"}, \code{xlab} is applied to y-axis and vice versa. Similarly, \code{x.axist.type} applies to y-axis - this defaults to "category" and would not normally need changing. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # A.1 Box plot of 4 variables draw_box(iris[, 1:4]) # A.2 Grouped Box plot draw_box(iris[, 1:4], group = iris[["Species"]]) draw_box(iris[, 1:4], group = iris[["Species"]], annotate_n = TRUE) # B. Boxplot binned by time periods # Synthetic data with an instantenous shift in distributions set.seed(2021) dat1 <- data.frame(alpha = rnorm(200, 0), beta = rnorm(200, 2), gamma = rnorm(200, 3)) dat2 <- data.frame(alpha = rnorm(200, 5), beta = rnorm(200, 8), gamma = rnorm(200, -3)) x <- rbind(dat1, dat2) startDate <- as.Date("2019-12-04") endDate <- as.Date("2021-03-31") time <- seq(startDate, endDate, length.out = 400) draw_box(x[, 1], time, "year", ylab = "alpha") draw_box(x, time, "year", legend.xy = c(0, 1)) draw_box(x, time, "quarter", legend.xy = c(0, 1)) draw_box(x, time, "month", legend.orientation = "h", legend.xy = c(0, 1), legend.yanchor = "bottom" ) # (Note how the boxplots widen when the period includes data from both dat1 and dat2) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_calibration.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_calibration.R \name{draw_calibration} \alias{draw_calibration} \title{Draw calibration plot} \usage{ draw_calibration( true_labels, predicted_prob, n_bins = 10L, bin_method = c("quantile", "equidistant"), binclasspos = 2L, main = NULL, subtitle = NULL, xlab = "Mean predicted probability", ylab = "Empirical risk", show_marginal_x = TRUE, marginal_x_y = -0.02, marginal_col = NULL, marginal_size = 10, mode = "markers+lines", show_brier = TRUE, theme = choose_theme(getOption("rtemis_theme")), filename = NULL, ... ) } \arguments{ \item{true_labels}{Factor or list of factors with true class labels} \item{predicted_prob}{Numeric vector or list of numeric vectors with predicted probabilities} \item{n_bins}{Integer: Number of windows to split the data into} \item{bin_method}{Character: "quantile" or "equidistant": Method to bin the estimated probabilities.} \item{binclasspos}{Integer: Index of the positive class. The convention used in the package is the second level is the positive class.} \item{main}{Character: Main title} \item{subtitle}{Character: Subtitle, placed bottom right of plot} \item{xlab}{Character: x-axis label} \item{ylab}{Character: y-axis label} \item{show_marginal_x}{Logical: Add marginal plot of distribution of estimated probabilities} \item{marginal_x_y}{Numeric: y position of marginal plot} \item{marginal_col}{Character: Color of marginal plot} \item{marginal_size}{Numeric: Size of marginal plot} \item{mode}{Character: "lines", "markers", "lines+markers": How to plot.} \item{show_brier}{Logical: If TRUE, add Brier scores to trace names.} \item{theme}{\code{Theme} object.} \item{filename}{Character: Path to save output.} \item{...}{Additional arguments passed to \link{draw_scatter}} } \value{ \code{plotly} object. } \description{ Draw calibration plot } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # Synthetic data with n cases n <- 500L true_labels <- factor(sample(c("A", "B"), n, replace = TRUE)) # Synthetic probabilities where A has mean 0.25 and B has mean 0.75 predicted_prob <- ifelse(true_labels == "A", rbeta(n, 2, 6), rbeta(n, 6, 2) ) draw_calibration(true_labels, predicted_prob) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_confusion.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_confusion.R \name{draw_confusion} \alias{draw_confusion} \title{Plot confusion matrix} \usage{ draw_confusion( x, xlab = "Predicted", ylab = "Reference", true_col = "#43A4AC", false_col = "#FA9860", font_size = 18, main = NULL, main_y = 1, main_yanchor = "bottom", theme = choose_theme(getOption("rtemis_theme")), margin = list(l = 20, r = 5, b = 5, t = 20), filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) } \arguments{ \item{x}{\code{ClassificationMetrics} object produced by \link{classification_metrics} or confusion matrix where rows are the reference and columns are the estimated classes. For binary classification, the first row and column are the positive class.} \item{xlab}{Character: x-axis label. Default is "Predicted".} \item{ylab}{Character: y-axis label. Default is "Reference".} \item{true_col}{Color for true positives & true negatives.} \item{false_col}{Color for false positives & false negatives.} \item{font_size}{Integer: font size.} \item{main}{Character: plot title.} \item{main_y}{Numeric: y position of the title.} \item{main_yanchor}{Character: y anchor of the title.} \item{theme}{\code{Theme} object.} \item{margin}{List: Plot margins.} \item{filename}{Character: file name to save the plot. Default is NULL.} \item{file_width}{Numeric: width of the file. Default is 500.} \item{file_height}{Numeric: height of the file. Default is 500.} \item{file_scale}{Numeric: scale of the file. Default is 1.} } \value{ \code{plotly} object. } \description{ Plot confusion matrix } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # Assume positive class is "b" true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b")) predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a")) predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39) metrics <- classification_metrics(true_labels, predicted_labels, predicted_prob) draw_confusion(metrics) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_dist.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_dist.R \name{draw_dist} \alias{draw_dist} \title{Draw Distributions using Histograms and Density Plots} \usage{ draw_dist( x, type = c("density", "histogram"), mode = c("overlap", "ridge"), group = NULL, main = NULL, xlab = NULL, ylab = NULL, col = NULL, alpha = 0.75, plot_bg = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = getOption("rtemis_palette", "rtms"), axes_square = FALSE, group_names = NULL, font_size = 16, font_alpha = 0.8, legend = NULL, legend_xy = c(0, 1), legend_col = NULL, legend_bg = "#FFFFFF00", legend_border_col = "#FFFFFF00", bargap = 0.05, vline = NULL, vline_col = theme[["fg"]], vline_width = 1, vline_dash = "dot", text = NULL, text_x = 1, text_xref = "paper", text_xanchor = "left", text_y = 1, text_yref = "paper", text_yanchor = "top", text_col = theme[["fg"]], margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0), automargin_x = TRUE, automargin_y = TRUE, zerolines = FALSE, density_kernel = "gaussian", density_bw = "SJ", histnorm = c("", "density", "percent", "probability", "probability density"), histfunc = c("count", "sum", "avg", "min", "max"), hist_n_bins = 20, barmode = "overlay", ridge_sharex = TRUE, ridge_y_labs = FALSE, ridge_order_on_mean = TRUE, displayModeBar = TRUE, modeBar_file_format = "svg", width = NULL, height = NULL, filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) } \arguments{ \item{x}{Numeric vector / data.frame / list: Input. If not a vector, each column / each element is drawn.} \item{type}{Character: "density" or "histogram".} \item{mode}{Character: "overlap", "ridge". How to plot different groups; on the same axes ("overlap"), or on separate plots with the same x-axis ("ridge").} \item{group}{Vector: Will be converted to factor; levels define group members.} \item{main}{Character: Main title for the plot.} \item{xlab}{Character: Label for the x-axis.} \item{ylab}{Character: Label for the y-axis.} \item{col}{Color: Colors for the plot.} \item{alpha}{Numeric: Alpha transparency for plot elements.} \item{plot_bg}{Color: Background color for plot area.} \item{theme}{\code{Theme} object.} \item{palette}{Character: Color palette to use.} \item{axes_square}{Logical: If TRUE, draw a square plot to fill the graphic device. Default = FALSE.} \item{group_names}{Character: Names for the groups.} \item{font_size}{Numeric: Font size for plot text.} \item{font_alpha}{Numeric: Alpha transparency for font.} \item{legend}{Logical: If TRUE, draw legend. Default = NULL, which will be set to TRUE if x is a list of more than 1 element.} \item{legend_xy}{Numeric, vector, length 2: Relative x, y position for legend. Default = c(0, 1).} \item{legend_col}{Color: Color for the legend text.} \item{legend_bg}{Color: Background color for legend.} \item{legend_border_col}{Color: Border color for legend.} \item{bargap}{Numeric: The gap between adjacent histogram bars in plot fraction.} \item{vline}{Numeric, vector: If defined, draw a vertical line at this x value(s).} \item{vline_col}{Color: Color for \code{vline}.} \item{vline_width}{Numeric: Width for \code{vline}.} \item{vline_dash}{Character: Type of line to draw: "solid", "dot", "dash", "longdash", "dashdot", or "longdashdot".} \item{text}{Character: If defined, add this text over the plot.} \item{text_x}{Numeric: x-coordinate for \code{text}.} \item{text_xref}{Character: "x": \code{text_x} refers to plot's x-axis; "paper": \code{text_x} refers to plotting area from 0-1.} \item{text_xanchor}{Character: "auto", "left", "center", "right".} \item{text_y}{Numeric: y-coordinate for \code{text}.} \item{text_yref}{Character: "y": \code{text_y} refers to plot's y-axis; "paper": \code{text_y} refers to plotting area from 0-1.} \item{text_yanchor}{Character: "auto", "top", "middle", "bottom".} \item{text_col}{Color: Color for \code{text}.} \item{margin}{List: Margins for the plot.} \item{automargin_x}{Logical: If TRUE, automatically adjust x-axis margins.} \item{automargin_y}{Logical: If TRUE, automatically adjust y-axis margins.} \item{zerolines}{Logical: If TRUE, draw lines at y = 0.} \item{density_kernel}{Character: Kernel to use for density estimation.} \item{density_bw}{Character: Bandwidth to use for density estimation.} \item{histnorm}{Character: NULL, "percent", "probability", "density", "probability density".} \item{histfunc}{Character: "count", "sum", "avg", "min", "max".} \item{hist_n_bins}{Integer: Number of bins to use if type = "histogram".} \item{barmode}{Character: Barmode for histogram. One of "overlay", "stack", "relative", "group".} \item{ridge_sharex}{Logical: If TRUE, draw single x-axis when \code{mode = "ridge"}.} \item{ridge_y_labs}{Logical: If TRUE, show individual y labels when \code{mode = "ridge"}.} \item{ridge_order_on_mean}{Logical: If TRUE, order groups by mean value when \code{mode = "ridge"}.} \item{displayModeBar}{Logical: If TRUE, display the mode bar.} \item{modeBar_file_format}{Character: File format for mode bar. Default = "svg".} \item{width}{Numeric: Force plot size to this width. Default = NULL, i.e. fill available space.} \item{height}{Numeric: Force plot size to this height. Default = NULL, i.e. fill available space.} \item{filename}{Character: Path to file to save static plot.} \item{file_width}{Integer: File width in pixels for when \code{filename} is set.} \item{file_height}{Integer: File height in pixels for when \code{filename} is set.} \item{file_scale}{Numeric: If saving to file, scale plot by this number.} } \value{ \code{plotly} object. } \description{ Draw Distributions using Histograms and Density Plots using \code{plotly}. } \details{ See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation. If input is data.frame, non-numeric variables will be removed. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # Will automatically use only numeric columns draw_dist(iris) draw_dist(iris[["Sepal.Length"]], group = iris[["Species"]]) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_fit.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_scatter.R \name{draw_fit} \alias{draw_fit} \title{True vs. Predicted Plot} \usage{ draw_fit( x, y, xlab = "True", ylab = "Predicted", fit = "glm", se_fit = TRUE, axes_square = TRUE, axes_equal = TRUE, diagonal = TRUE, ... ) } \arguments{ \item{x}{Numeric, vector/data.frame/list: True values. If y is NULL and \code{NCOL(x) > 1}, first two columns used as \code{x} and \code{y}, respectively} \item{y}{Numeric, vector/data.frame/list: Predicted values} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{fit}{Character: Fit method.} \item{se_fit}{Logical: If TRUE, include standard error of the fit.} \item{axes_square}{Logical: If TRUE, draw a square plot.} \item{axes_equal}{Logical: If TRUE, set equal scaling for axes.} \item{diagonal}{Logical: If TRUE, add diagonal line.} \item{...}{Additional arguments passed to \link{draw_scatter}} } \value{ \code{plotly} object. } \description{ A \code{draw_scatter} wrapper for plotting true vs. predicted values } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} x <- rnorm(500) y <- x + rnorm(500) draw_fit(x, y) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_graphD3.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_graphd3.R \name{draw_graphD3} \alias{draw_graphD3} \title{Plot graph using \pkg{networkD3}} \usage{ draw_graphD3( net, groups = NULL, color_scale = NULL, edge_col = NULL, node_col = NULL, node_alpha = 0.5, edge_alpha = 0.33, zoom = TRUE, legend = FALSE, palette = get_palette(getOption("rtemis_palette")), theme = choose_theme(getOption("rtemis_theme")), ... ) } \arguments{ \item{net}{\pkg{igraph} network.} \item{groups}{Vector, length n nodes indicating group/cluster/community membership of nodes in \code{net}.} \item{color_scale}{D3 colorscale (e.g. \code{networkD3::JS("d3.scaleOrdinal(d3.schemeCategory20b);")}).} \item{edge_col}{Color for edges.} \item{node_col}{Color for nodes.} \item{node_alpha}{Float [0, 1]: Node opacity.} \item{edge_alpha}{Float [0, 1]: Edge opacity.} \item{zoom}{Logical: If TRUE, graph is zoomable.} \item{legend}{Logical: If TRUE, display legend for groups.} \item{palette}{Character vector: Colors to use.} \item{theme}{\code{Theme} object.} \item{...}{Additional arguments to pass to \code{networkD3}.} } \value{ \code{forceNetwork} object. } \description{ Plot graph using \pkg{networkD3} } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} library(igraph) g <- make_ring(10) draw_graphD3(g) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_graphjs.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_graphjs.R \name{draw_graphjs} \alias{draw_graphjs} \title{Plot network using \pkg{threejs::graphjs}} \usage{ draw_graphjs( net, vertex_size = 1, vertex_col = NULL, vertex_label_col = NULL, vertex_label_alpha = 0.66, vertex_frame_col = NA, vertex_label = NULL, vertex_shape = "circle", edge_col = NULL, edge_alpha = 0.5, edge_curved = 0.35, edge_width = 2, layout = c("fr", "dh", "drl", "gem", "graphopt", "kk", "lgl", "mds", "sugiyama"), coords = NULL, layout_args = list(), cluster = NULL, groups = NULL, cluster_config = list(), cluster_mark_groups = TRUE, cluster_color_vertices = FALSE, main = "", theme = choose_theme(getOption("rtemis_theme")), palette = getOption("rtemis_palette", "rtms"), mar = rep(0, 4), filename = NULL, verbosity = 1L, ... ) } \arguments{ \item{net}{\pkg{igraph} network.} \item{vertex_size}{Numeric: Vertex size.} \item{vertex_col}{Color for vertices.} \item{vertex_label_col}{Color for vertex labels.} \item{vertex_label_alpha}{Numeric: Transparency for \code{vertex_label_col}.} \item{vertex_frame_col}{Color for vertex border (frame).} \item{vertex_label}{Character vector: Vertex labels. Default = NULL, which will keep existing names in \code{net} if any. Set to NA to avoid printing vertex labels.} \item{vertex_shape}{Character, vector, length 1 or N nodes: Vertex shape. See \code{graphjs("vertex_shape")}.} \item{edge_col}{Color for edges.} \item{edge_alpha}{Numeric: Transparency for edges.} \item{edge_curved}{Numeric: Curvature of edges.} \item{edge_width}{Numeric: Edge thickness.} \item{layout}{Character: one of: "fr", "dh", "drl", "gem", "graphopt", "kk", "lgl", "mds", "sugiyama", corresponding to all the available layouts in \pkg{igraph}.} \item{coords}{Output of precomputed \pkg{igraph} layout. If provided, \code{layout} is ignored.} \item{layout_args}{List of arguments to pass to \code{layout} function.} \item{cluster}{Character: one of: "edge_betweenness", "fast_greedy", "infomap", "label_prop", "leading_eigen", "louvain", "optimal", "spinglass", "walktrap", corresponding to all the available \pkg{igraph} clustering functions.} \item{groups}{Output of precomputed \pkg{igraph} clustering. If provided, \code{cluster} is ignored.} \item{cluster_config}{List of arguments to pass to \code{cluster} function.} \item{cluster_mark_groups}{Logical: If TRUE, draw polygons to indicate clusters, if \code{groups} or \code{cluster} are defined.} \item{cluster_color_vertices}{Logical: If TRUE, color vertices by cluster membership.} \item{main}{Character: Main title.} \item{theme}{\code{Theme} object.} \item{palette}{Color vector or name of rtemis palette.} \item{mar}{Numeric vector, length 4: \code{par}'s margin argument.} \item{filename}{Character: If provided, save plot to this filepath.} \item{verbosity}{Integer: Verbosity level.} \item{...}{Extra arguments to pass to \code{igraph::plot.igraph()}.} } \value{ \code{scatterplotThree} object. } \description{ Interactive plotting of an \pkg{igraph} net using \pkg{threejs}. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} library(igraph) g <- make_ring(10) draw_graphjs(g) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_heatmap.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_heatmap.R \name{draw_heatmap} \alias{draw_heatmap} \title{Interactive Heatmaps} \usage{ draw_heatmap( x, Rowv = TRUE, Colv = TRUE, cluster = FALSE, symm = FALSE, cellnote = NULL, colorgrad_n = 101, colors = NULL, space = "rgb", lo = "#18A3AC", lomid = NULL, mid = NULL, midhi = NULL, hi = "#F48024", k_row = 1, k_col = 1, grid_gap = 0, limits = NULL, margins = NULL, main = NULL, xlab = NULL, ylab = NULL, key_title = NULL, showticklabels = NULL, colorbar_len = 0.7, plot_method = "plotly", theme = choose_theme(getOption("rtemis_theme")), row_side_colors = NULL, row_side_palette = NULL, col_side_colors = NULL, col_side_palette = NULL, font_size = NULL, padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, ... ) } \arguments{ \item{x}{Input matrix.} \item{Rowv}{Logical or dendrogram. If Logical: Compute dendrogram and reorder rows. Defaults to FALSE. If dendrogram: use as is, without reordering. See more at \code{heatmaply::heatmaply("Rowv")}.} \item{Colv}{Logical or dendrogram. If Logical: Compute dendrogram and reorder columns. Defaults to FALSE. If dendrogram: use as is, without reordering. See more at \code{heatmaply::heatmaply("Colv")}.} \item{cluster}{Logical: If TRUE, set \code{Rowv} and \code{Colv} to TRUE.} \item{symm}{Logical: If TRUE, treat \code{x} symmetrically - \code{x} must be a square matrix.} \item{cellnote}{Matrix with values to be displayed on hover. Defaults to \code{ddSci(x)}.} \item{colorgrad_n}{Integer: Number of colors in gradient. Default = 101.} \item{colors}{Character vector: Colors to use in gradient.} \item{space}{Character: Color space to use. Default = "rgb".} \item{lo}{Character: Color for low values. Default = "#18A3AC".} \item{lomid}{Character: Color for low-mid values.} \item{mid}{Character: Color for mid values.} \item{midhi}{Character: Color for mid-high values.} \item{hi}{Character: Color for high values. Default = "#F48024".} \item{k_row}{Integer: Number of desired number of groups by which to color dendrogram branches in the rows. Default = 1.} \item{k_col}{Integer: Number of desired number of groups by which to color dendrogram branches in the columns. Default = 1.} \item{grid_gap}{Integer: Space between cells. Default = 0 (no space).} \item{limits}{Float, length 2: Determine color range. Default = NULL, which automatically centers values around 0.} \item{margins}{Float, length 4: Heatmap margins.} \item{main}{Character: Main title.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{key_title}{Character: Title for the color key.} \item{showticklabels}{Logical: If TRUE, show tick labels.} \item{colorbar_len}{Numeric: Length of the colorbar.} \item{plot_method}{Character: Plot method to use. Default = "plotly".} \item{theme}{\code{Theme} object.} \item{row_side_colors}{Data frame: Column names will be label names, cells should be label colors. See \code{heatmaply::heatmaply("row_side_colors")}.} \item{row_side_palette}{Color palette function. See \code{heatmaply::heatmaply("row_side_palette")}.} \item{col_side_colors}{Data frame: Column names will be label names, cells should be label colors. See \code{heatmaply::heatmaply("col_side_colors")}.} \item{col_side_palette}{Color palette function. See \code{heatmaply::heatmaply("col_side_palette")}.} \item{font_size}{Numeric: Font size.} \item{padding}{Numeric: Padding between cells.} \item{displayModeBar}{Logical: If TRUE, display the plotly mode bar.} \item{modeBar_file_format}{Character: File format for image exports from the mode bar.} \item{filename}{Character: File name to save the plot.} \item{file_width}{Numeric: Width of exported image.} \item{file_height}{Numeric: Height of exported image.} \item{file_scale}{Numeric: Scale of exported image.} \item{...}{Additional arguments to be passed to \code{heatmaply::heatmaply}.} } \value{ \code{plotly} object.` } \description{ Draw interactive heatmaps using \code{heatmaply}. } \details{ See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation. 'heatmaply' unfortunately forces loading of the 'colorspace' namespace. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} x <- rnormmat(200, 20) xcor <- cor(x) draw_heatmap(xcor) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_leaflet.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_leaflet.R \name{draw_leaflet} \alias{draw_leaflet} \title{Plot interactive choropleth map using \pkg{leaflet}} \usage{ draw_leaflet( fips, values, names = NULL, fillOpacity = 1, color_mapping = c("Numeric", "Bin"), col_lo = "#0290EE", col_hi = "#FE4AA3", col_na = "#303030", col_highlight = "#FE8A4F", col_interpolate = c("linear", "spline"), col_bins = 21, domain = NULL, weight = 0.5, color = "black", alpha = 1, bg_tile_provider = leaflet::providers[["CartoDB.Positron"]], bg_tile_alpha = 0.67, fg_tile_provider = leaflet::providers[["CartoDB.PositronOnlyLabels"]], legend_position = c("topright", "bottomright", "bottomleft", "topleft"), legend_alpha = 0.8, legend_title = NULL, init_lng = -98.5418083333333, init_lat = 39.2074138888889, init_zoom = 3, stroke = TRUE ) } \arguments{ \item{fips}{Character vector: FIPS codes. (If numeric, it will be appropriately zero-padded).} \item{values}{Values to map to \code{fips}.} \item{names}{Character vector: Optional county names to appear on hover along \code{values}.} \item{fillOpacity}{Float: Opacity for fill colors.} \item{color_mapping}{Character: "Numeric" or "Bin".} \item{col_lo}{Overlay color mapped to lowest value.} \item{col_hi}{Overlay color mapped to highest value.} \item{col_na}{Color mapped to NA values.} \item{col_highlight}{Hover border color.} \item{col_interpolate}{Character: "linear" or "spline".} \item{col_bins}{Integer: Number of color bins to create if \code{color_mapping = "Bin"}.} \item{domain}{Limits for mapping colors to values. Default = NULL and set to range.} \item{weight}{Float: Weight of county border lines.} \item{color}{Color of county border lines.} \item{alpha}{Float: Overlay transparency.} \item{bg_tile_provider}{Background tile (below overlay colors), one of \code{leaflet::providers}.} \item{bg_tile_alpha}{Float: Background tile transparency.} \item{fg_tile_provider}{Foreground tile (above overlay colors), one of \code{leaflet::providers}.} \item{legend_position}{Character: One of: "topright", "bottomright", "bottomleft", "topleft".} \item{legend_alpha}{Float: Legend box transparency.} \item{legend_title}{Character: Defaults to name of \code{values} variable.} \item{init_lng}{Float: Center map around this longitude (in decimal form). Default = -98.54180833333334 (US geographic center).} \item{init_lat}{Float: Center map around this latitude (in decimal form). Default = 39.207413888888894 (US geographic center).} \item{init_zoom}{Integer: Initial zoom level (depends on device, i.e. window, size).} \item{stroke}{Logical: If TRUE, draw polygon borders.} } \value{ \code{leaflet} object. } \description{ Plot interactive choropleth map using \pkg{leaflet} } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} fips <- c(06075, 42101) population <- c(874961, 1579000) names <- c("SF", "Philly") draw_leaflet(fips, population, names) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_pie.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_pie.R \name{draw_pie} \alias{draw_pie} \title{Interactive Pie Chart} \usage{ draw_pie( x, main = NULL, xlab = NULL, ylab = NULL, alpha = 0.8, bg = NULL, plot_bg = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), category_names = NULL, textinfo = "label+percent", font_size = 16, labs_col = NULL, legend = TRUE, legend_col = NULL, sep_col = NULL, margin = list(b = 50, l = 50, t = 50, r = 20), padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) } \arguments{ \item{x}{data.frame: Input: Either a) 1 numeric column with categories defined by rownames, or b) two columns, the first is category names, the second numeric or c) a numeric vector with categories defined using the \code{category.names} argument.} \item{main}{Character: Plot title. Default = NULL, which results in \code{colnames(x)[1]}.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{alpha}{Numeric: Alpha for the pie slices.} \item{bg}{Character: Background color.} \item{plot_bg}{Character: Plot background color.} \item{theme}{\code{Theme} object.} \item{palette}{Character vector: Colors to use.} \item{category_names}{Character, vector, length = NROW(x): Category names. Default = NULL, which uses either \code{rownames(x)}, or the first column of \code{x} if \code{ncol(x) = 2}.} \item{textinfo}{Character: Info to show over each slice: "label", "percent", "label+percent".} \item{font_size}{Integer: Font size for labels.} \item{labs_col}{Character: Color of labels.} \item{legend}{Logical: If TRUE, show legend.} \item{legend_col}{Character: Color for legend.} \item{sep_col}{Character: Separator color.} \item{margin}{List: Margin settings.} \item{padding}{Numeric: Padding between cells.} \item{displayModeBar}{Logical: If TRUE, display the plotly mode bar.} \item{modeBar_file_format}{Character: File format for image exports from the mode bar.} \item{filename}{Character: File name to save plot.} \item{file_width}{Integer: Width for saved file.} \item{file_height}{Integer: Height for saved file.} \item{file_scale}{Numeric: Scale for saved file.} } \value{ \code{plotly} object. } \description{ Draw interactive pie charts using \code{plotly}. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} draw_pie(VADeaths[, 1, drop = FALSE]) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_protein.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_protein.R \name{draw_protein} \alias{draw_protein} \title{Plot an amino acid sequence with annotations} \usage{ draw_protein( x, site = NULL, region = NULL, ptm = NULL, cleavage_site = NULL, variant = NULL, disease_variants = NULL, n_per_row = NULL, main = NULL, main_xy = c(0.055, 0.975), main_xref = "paper", main_yref = "paper", main_xanchor = "middle", main_yanchor = "top", layout = c("simple", "grid", "1curve", "2curve"), show_markers = TRUE, show_labels = TRUE, font_size = 18, label_col = NULL, scatter_mode = "markers+lines", marker_size = 28, marker_col = NULL, marker_alpha = 1, marker_symbol = "circle", line_col = NULL, line_alpha = 1, line_width = 2, show_full_names = TRUE, region_scatter_mode = "markers+lines", region_style = 3, region_marker_size = marker_size, region_marker_alpha = 0.6, region_marker_symbol = "circle", region_line_dash = "solid", region_line_shape = "line", region_line_smoothing = 1, region_line_width = 1, region_line_alpha = 0.6, theme = choose_theme(getOption("rtemis_theme")), region_palette = getOption("rtemis_palette", "rtms"), region_outline_only = FALSE, region_outline_pad = 2, region_pad = 0.35, region_fill_alpha = 0.1666666, region_fill_shape = "line", region_fill_smoothing = 1, bpadcx = 0.5, bpadcy = 0.5, site_marker_size = marker_size, site_marker_symbol = marker_symbol, site_marker_alpha = 1, site_border_width = 1.5, site_palette = getOption("rtemis_palette", "rtms"), variant_col = "#FA6E1E", disease_variant_col = "#E266AE", showlegend_ptm = TRUE, ptm_col = NULL, ptm_symbol = "circle", ptm_offset = 0.12, ptm_pad = 0.35, ptm_marker_size = marker_size/4.5, clv_col = NULL, clv_symbol = "triangle-down", clv_offset = 0.12, clv_pad = 0.35, clv_marker_size = marker_size/4, annotate_position_every = 10, annotate_position_alpha = 0.5, annotate_position_ay = -0.4 * marker_size, position_font_size = font_size - 6, legend_xy = c(0.97, 0.954), legend_xanchor = "left", legend_yanchor = "top", legend_orientation = "v", legend_col = NULL, legend_bg = "#FFFFFF00", legend_border_col = "#FFFFFF00", legend_borderwidth = 0, legend_group_gap = 0, margin = list(b = 0, l = 0, t = 0, r = 0, pad = 0), showgrid_x = FALSE, showgrid_y = FALSE, automargin_x = TRUE, automargin_y = TRUE, xaxis_autorange = TRUE, yaxis_autorange = "reversed", scaleanchor_y = "x", scaleratio_y = 1, hoverlabel_align = "left", displayModeBar = TRUE, modeBar_file_format = "svg", scrollZoom = TRUE, filename = NULL, file_width = 1320, file_height = 990, file_scale = 1, width = NULL, height = NULL, verbosity = 1L ) } \arguments{ \item{x}{Character vector: amino acid sequence (1-letter abbreviations) OR \code{a3} object OR Character: path to JSON file OR Character: UniProt accession number.} \item{site}{Named list of lists with indices of sites. These will be highlighted by coloring the border of markers.} \item{region}{Named list of lists with indices of regions. These will be highlighted by coloring the markers and lines of regions using the \code{palette} colors.} \item{ptm}{List of post-translational modifications.} \item{cleavage_site}{List of cleavage sites.} \item{variant}{List of variant information.} \item{disease_variants}{List of disease variant information.} \item{n_per_row}{Integer: Number of amino acids to show per row.} \item{main}{Character: Main title.} \item{main_xy}{Numeric vector, length 2: x and y coordinates for title. e.g. if \code{main_xref} and \code{main_yref} are \code{"paper"}: \code{c(0.055, .975)} is top left, \code{c(.5, .975)} is top and middle.} \item{main_xref}{Character: xref for title.} \item{main_yref}{Character: yref for title.} \item{main_xanchor}{Character: xanchor for title.} \item{main_yanchor}{Character: yanchor for title.} \item{layout}{Character: "1curve", "grid": type of layout to use.} \item{show_markers}{Logical: If TRUE, show amino acid markers.} \item{show_labels}{Logical: If TRUE, annotate amino acids with elements.} \item{font_size}{Integer: Font size for labels.} \item{label_col}{Color for labels.} \item{scatter_mode}{Character: Mode for scatter plot.} \item{marker_size}{Integer: Size of markers.} \item{marker_col}{Color for markers.} \item{marker_alpha}{Numeric: Alpha for markers.} \item{marker_symbol}{Character: Symbol for markers.} \item{line_col}{Color for lines.} \item{line_alpha}{Numeric: Alpha for lines.} \item{line_width}{Numeric: Width for lines.} \item{show_full_names}{Logical: If TRUE, show full names of amino acids.} \item{region_scatter_mode}{Character: Mode for scatter plot.} \item{region_style}{Integer: Style for regions.} \item{region_marker_size}{Integer: Size of region markers.} \item{region_marker_alpha}{Numeric: Alpha for region markers.} \item{region_marker_symbol}{Character: Symbol for region markers.} \item{region_line_dash}{Character: Dash for region lines.} \item{region_line_shape}{Character: Shape for region lines.} \item{region_line_smoothing}{Numeric: Smoothing for region lines.} \item{region_line_width}{Numeric: Width for region lines.} \item{region_line_alpha}{Numeric: Alpha for region lines.} \item{theme}{\code{Theme} object.} \item{region_palette}{Named list of colors for regions.} \item{region_outline_only}{Logical: If TRUE, only show outline of regions.} \item{region_outline_pad}{Numeric: Padding for region outline.} \item{region_pad}{Numeric: Padding for region.} \item{region_fill_alpha}{Numeric: Alpha for region fill.} \item{region_fill_shape}{Character: Shape for region fill.} \item{region_fill_smoothing}{Numeric: Smoothing for region fill.} \item{bpadcx}{Numeric: Padding for region border.} \item{bpadcy}{Numeric: Padding for region border.} \item{site_marker_size}{Integer: Size of site markers.} \item{site_marker_symbol}{Character: Symbol for site markers.} \item{site_marker_alpha}{Numeric: Alpha for site markers.} \item{site_border_width}{Numeric: Width for site borders.} \item{site_palette}{Named list of colors for sites.} \item{variant_col}{Color for variants.} \item{disease_variant_col}{Color for disease variants.} \item{showlegend_ptm}{Logical: If TRUE, show legend for PTMs.} \item{ptm_col}{Named list of colors for PTMs.} \item{ptm_symbol}{Character: Symbol for PTMs.} \item{ptm_offset}{Numeric: Offset for PTMs.} \item{ptm_pad}{Numeric: Padding for PTMs.} \item{ptm_marker_size}{Integer: Size of PTM markers.} \item{clv_col}{Color for cleavage site annotations.} \item{clv_symbol}{Character: Symbol for cleavage site annotations.} \item{clv_offset}{Numeric: Offset for cleavage site annotations.} \item{clv_pad}{Numeric: Padding for cleavage site annotations.} \item{clv_marker_size}{Integer: Size of cleavage site annotation markers.} \item{annotate_position_every}{Integer: Annotate every nth position.} \item{annotate_position_alpha}{Numeric: Alpha for position annotations.} \item{annotate_position_ay}{Numeric: Y offset for position annotations.} \item{position_font_size}{Integer: Font size for position annotations.} \item{legend_xy}{Numeric vector, length 2: x and y coordinates for legend.} \item{legend_xanchor}{Character: xanchor for legend.} \item{legend_yanchor}{Character: yanchor for legend.} \item{legend_orientation}{Character: Orientation for legend.} \item{legend_col}{Color for legend.} \item{legend_bg}{Color for legend background.} \item{legend_border_col}{Color for legend border.} \item{legend_borderwidth}{Numeric: Width for legend border.} \item{legend_group_gap}{Numeric: Gap between legend groups.} \item{margin}{List: Margin settings.} \item{showgrid_x}{Logical: If TRUE, show x grid.} \item{showgrid_y}{Logical: If TRUE, show y grid.} \item{automargin_x}{Logical: If TRUE, use automatic margin for x axis.} \item{automargin_y}{Logical: If TRUE, use automatic margin for y axis.} \item{xaxis_autorange}{Logical: If TRUE, use automatic range for x axis.} \item{yaxis_autorange}{Character: If TRUE, use automatic range for y axis.} \item{scaleanchor_y}{Character: Scale anchor for y axis.} \item{scaleratio_y}{Numeric: Scale ratio for y axis.} \item{hoverlabel_align}{Character: Alignment for hover label.} \item{displayModeBar}{Logical: If TRUE, display mode bar.} \item{modeBar_file_format}{Character: File format for mode bar.} \item{scrollZoom}{Logical: If TRUE, enable scroll zoom.} \item{filename}{Character: File name to save plot.} \item{file_width}{Integer: Width for saved file.} \item{file_height}{Integer: Height for saved file.} \item{file_scale}{Numeric: Scale for saved file.} \item{width}{Integer: Width for plot.} \item{height}{Integer: Height for plot.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{plotly} object. } \description{ Plot an amino acid sequence with multiple site and/or region annotations. } \examples{ \dontrun{ # Reads sequence from UniProt server tau <- seqinr::read.fasta("https://rest.uniprot.org/uniprotkb/P10636.fasta", seqtype = "AA" ) draw_protein(as.character(tau[[1]])) # or directly using the UniProt accession number: draw_protein("P10636") } } \author{ EDG } ================================================ FILE: man/draw_pvals.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_pvals.R \name{draw_pvals} \alias{draw_pvals} \title{Barplot p-values using \link{draw_bar}} \usage{ draw_pvals( x, xnames = NULL, yname = NULL, p_adjust_method = "none", pval_hline = 0.05, hline_col = rt_red, hline_dash = "dash", ... ) } \arguments{ \item{x}{Float, vector: p-values.} \item{xnames}{Character, vector: feature names.} \item{yname}{Character: outcome name.} \item{p_adjust_method}{Character: method for \link{p.adjust}.} \item{pval_hline}{Float: Significance level at which to plot horizontal line.} \item{hline_col}{Color for \code{pval_hline}.} \item{hline_dash}{Character: type of line to draw.} \item{...}{Additional arguments passed to \link{draw_bar}.} } \value{ \code{plotly} object. } \description{ Plot 1 - p-values as a barplot } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} draw_pvals(c(0.01, 0.02, 0.03), xnames = c("Feature1", "Feature2", "Feature3")) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_roc.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_roc.R \name{draw_roc} \alias{draw_roc} \title{Draw ROC curve} \usage{ draw_roc( true_labels, predicted_prob, multiclass_fill_labels = TRUE, main = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), legend = TRUE, legend_title = "Group (AUC)", legend_xy = c(1, 0), legend_xanchor = "right", legend_yanchor = "bottom", auc_dp = 3L, xlim = c(-0.05, 1.05), ylim = c(-0.05, 1.05), diagonal = TRUE, diagonal_col = NULL, axes_square = TRUE, filename = NULL, ... ) } \arguments{ \item{true_labels}{Factor: True outcome labels.} \item{predicted_prob}{Numeric vector [0, 1]: Predicted probabilities for the positive class (i.e. second level of outcome). Or, for multiclass, a matrix of predicted probabilities with one column per class. Or, a list of such vectors/matrices to draw multiple ROC curves on the same plot.} \item{multiclass_fill_labels}{Logical: If TRUE, fill in labels for multiclass ROC curves. If FALSE, column names of \code{predicted_prob} must match levels of \code{true_labels}.} \item{main}{Character: Main title for the plot.} \item{theme}{\code{Theme} object.} \item{palette}{Character vector: Colors to use.} \item{legend}{Logical: If TRUE, draw legend.} \item{legend_title}{Character: Title for the legend.} \item{legend_xy}{Numeric vector: Position of the legend in the form c(x, y).} \item{legend_xanchor}{Character: X anchor for the legend.} \item{legend_yanchor}{Character: Y anchor for the legend.} \item{auc_dp}{Integer: Number of decimal places for AUC values.} \item{xlim}{Numeric vector: Limits for the x-axis.} \item{ylim}{Numeric vector: Limits for the y-axis.} \item{diagonal}{Logical: If TRUE, draw diagonal line.} \item{diagonal_col}{Character: Color for the diagonal line.} \item{axes_square}{Logical: If TRUE, make axes square.} \item{filename}{Character: If provided, save the plot to this file.} \item{...}{Additional arguments passed to \link{draw_scatter}.} } \value{ \code{plotly} object. } \description{ Draw ROC curve } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # Binary classification true_labels <- factor(c("A", "B", "A", "A", "B", "A", "B", "B", "A", "B")) predicted_prob <- c(0.1, 0.4, 0.35, 0.8, 0.65, 0.2, 0.9, 0.55, 0.3, 0.7) draw_roc(true_labels, predicted_prob) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_scatter.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_scatter.R \name{draw_scatter} \alias{draw_scatter} \title{Interactive Scatter Plots} \usage{ draw_scatter( x, y = NULL, fit = NULL, se_fit = FALSE, se_times = 1.96, include_fit_name = TRUE, cluster = NULL, cluster_config = list(k = 2), group = NULL, rsq = TRUE, mode = "markers", order_on_x = NULL, main = NULL, subtitle = NULL, xlab = NULL, ylab = NULL, alpha = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), axes_square = FALSE, group_names = NULL, font_size = 16, marker_col = NULL, marker_size = 8, symbol = "circle", fit_col = NULL, fit_alpha = 0.8, fit_lwd = 2.5, line_shape = "linear", se_col = NULL, se_alpha = 0.4, scatter_type = "scatter", show_marginal_x = FALSE, show_marginal_y = FALSE, marginal_x = x, marginal_y = y, marginal_x_y = NULL, marginal_y_x = NULL, marginal_col = NULL, marginal_alpha = 0.333, marginal_size = 10, legend = NULL, legend_title = NULL, legend_trace = TRUE, legend_xy = c(0, 0.98), legend_xanchor = "left", legend_yanchor = "auto", legend_orientation = "v", legend_col = NULL, legend_bg = "#FFFFFF00", legend_border_col = "#FFFFFF00", legend_borderwidth = 0, legend_group_gap = 0, x_showspikes = FALSE, y_showspikes = FALSE, spikedash = "solid", spikemode = "across", spikesnap = "hovered data", spikecolor = NULL, spikethickness = 1, margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0), main_y = 1.01, main_yanchor = "bottom", subtitle_x = 0.02, subtitle_y = 0.99, subtitle_xref = "paper", subtitle_yref = "paper", subtitle_xanchor = "left", subtitle_yanchor = "top", automargin_x = TRUE, automargin_y = TRUE, xlim = NULL, ylim = NULL, axes_equal = FALSE, diagonal = FALSE, diagonal_col = NULL, diagonal_dash = "dot", diagonal_alpha = 0.66, fit_params = NULL, vline = NULL, vline_col = theme[["fg"]], vline_width = 1, vline_dash = "dot", hline = NULL, hline_col = theme[["fg"]], hline_width = 1, hline_dash = "dot", hovertext = NULL, width = NULL, height = NULL, displayModeBar = TRUE, modeBar_file_format = "svg", scrollZoom = TRUE, filename = NULL, file_width = 500, file_height = 500, file_scale = 1, verbosity = 0L ) } \arguments{ \item{x}{Numeric, vector/data.frame/list: x-axis data. If y is NULL and \code{NCOL(x) > 1}, first two columns used as \code{x} and \code{y}, respectively.} \item{y}{Numeric, vector/data.frame/list: y-axis data.} \item{fit}{Character: Fit method.} \item{se_fit}{Logical: If TRUE, include standard error of the fit.} \item{se_times}{Numeric: Multiplier for standard error.} \item{include_fit_name}{Logical: If TRUE, include fit name in legend.} \item{cluster}{Character: Clustering method.} \item{cluster_config}{List: Config for clustering.} \item{group}{Factor: Grouping variable.} \item{rsq}{Logical: If TRUE, print R-squared values in legend if \code{fit} is set.} \item{mode}{Character, vector: "markers", "lines", "markers+lines".} \item{order_on_x}{Logical: If TRUE, order \code{x} and \code{y} on \code{x}.} \item{main}{Character: Main title.} \item{subtitle}{Character: Subtitle.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{alpha}{Numeric: Alpha for markers.} \item{theme}{\code{Theme} object.} \item{palette}{Character vector: Colors to use.} \item{axes_square}{Logical: If TRUE, draw a square plot.} \item{group_names}{Character: Names for groups.} \item{font_size}{Numeric: Font size.} \item{marker_col}{Color for markers.} \item{marker_size}{Numeric: Marker size.} \item{symbol}{Character: Marker symbol.} \item{fit_col}{Color for fit line.} \item{fit_alpha}{Numeric: Alpha for fit line.} \item{fit_lwd}{Numeric: Line width for fit line.} \item{line_shape}{Character: Line shape for line plots. Options: "linear", "hv", "vh", "hvh", "vhv".} \item{se_col}{Color for standard error band.} \item{se_alpha}{Numeric: Alpha for standard error band.} \item{scatter_type}{Character: Scatter plot type.} \item{show_marginal_x}{Logical: If TRUE, add marginal distribution line markers on x-axis.} \item{show_marginal_y}{Logical: If TRUE, add marginal distribution line markers on y-axis.} \item{marginal_x}{Numeric: Data for marginal distribution on x-axis.} \item{marginal_y}{Numeric: Data for marginal distribution on y-axis.} \item{marginal_x_y}{Numeric: Y position of marginal markers on x-axis.} \item{marginal_y_x}{Numeric: X position of marginal markers on y-axis.} \item{marginal_col}{Color for marginal markers.} \item{marginal_alpha}{Numeric: Alpha for marginal markers.} \item{marginal_size}{Numeric: Size of marginal markers.} \item{legend}{Logical: If TRUE, draw legend.} \item{legend_title}{Character: Title for legend.} \item{legend_trace}{Logical: If TRUE, draw legend trace. (For when you have \code{fit} and don't want a trace for the markers.)} \item{legend_xy}{Numeric: Position of legend.} \item{legend_xanchor}{Character: X anchor for legend.} \item{legend_yanchor}{Character: Y anchor for legend.} \item{legend_orientation}{Character: Orientation of legend.} \item{legend_col}{Color for legend text.} \item{legend_bg}{Color for legend background.} \item{legend_border_col}{Color for legend border.} \item{legend_borderwidth}{Numeric: Border width for legend.} \item{legend_group_gap}{Numeric: Gap between legend groups.} \item{x_showspikes}{Logical: If TRUE, show spikes on x-axis.} \item{y_showspikes}{Logical: If TRUE, show spikes on y-axis.} \item{spikedash}{Character: Dash type for spikes.} \item{spikemode}{Character: Spike mode.} \item{spikesnap}{Character: Spike snap mode.} \item{spikecolor}{Color for spikes.} \item{spikethickness}{Numeric: Thickness of spikes.} \item{margin}{List: Plot margins.} \item{main_y}{Numeric: Y position of main title.} \item{main_yanchor}{Character: Y anchor for main title.} \item{subtitle_x}{Numeric: X position of subtitle.} \item{subtitle_y}{Numeric: Y position of subtitle.} \item{subtitle_xref}{Character: X reference for subtitle.} \item{subtitle_yref}{Character: Y reference for subtitle.} \item{subtitle_xanchor}{Character: X anchor for subtitle.} \item{subtitle_yanchor}{Character: Y anchor for subtitle.} \item{automargin_x}{Logical: If TRUE, automatically adjust x-axis margins.} \item{automargin_y}{Logical: If TRUE, automatically adjust y-axis margins.} \item{xlim}{Numeric: Limits for x-axis.} \item{ylim}{Numeric: Limits for y-axis.} \item{axes_equal}{Logical: If TRUE, set equal scaling for axes.} \item{diagonal}{Logical: If TRUE, add diagonal line.} \item{diagonal_col}{Color for diagonal line.} \item{diagonal_dash}{Character: "solid", "dash", "dot", "dashdot", "longdash", "longdashdot". Dash type for diagonal line.} \item{diagonal_alpha}{Numeric: Alpha for diagonal line.} \item{fit_params}{\code{Hyperparameters} for fit.} \item{vline}{Numeric: X position for vertical line.} \item{vline_col}{Color for vertical line.} \item{vline_width}{Numeric: Width for vertical line.} \item{vline_dash}{Character: Dash type for vertical line.} \item{hline}{Numeric: Y position for horizontal line.} \item{hline_col}{Color for horizontal line.} \item{hline_width}{Numeric: Width for horizontal line.} \item{hline_dash}{Character: Dash type for horizontal line.} \item{hovertext}{List: Hover text for markers.} \item{width}{Numeric: Width of plot.} \item{height}{Numeric: Height of plot.} \item{displayModeBar}{Logical: If TRUE, display mode bar.} \item{modeBar_file_format}{Character: File format for mode bar.} \item{scrollZoom}{Logical: If TRUE, enable scroll zoom.} \item{filename}{Character: Filename to save plot.} \item{file_width}{Numeric: Width of saved file.} \item{file_height}{Numeric: Height of saved file.} \item{file_scale}{Numeric: Scale of saved file.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{plotly} object. } \description{ Draw interactive scatter plots using \code{plotly}. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} draw_scatter(iris$Sepal.Length, iris$Petal.Length, fit = "gam", se_fit = TRUE, group = iris$Species ) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_spectrogram.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_spectrogram.R \name{draw_spectrogram} \alias{draw_spectrogram} \title{Interactive Spectrogram} \usage{ draw_spectrogram( x, y, z, colorgrad_n = 101, colors = NULL, xlab = "Time", ylab = "Frequency", zlab = "Power", hover_xlab = xlab, hover_ylab = ylab, hover_zlab = zlab, zmin = NULL, zmax = NULL, zauto = TRUE, hoverlabel_align = "right", colorscale = "Jet", colorbar_y = 0.5, colorbar_yanchor = "middle", colorbar_xpad = 0, colorbar_ypad = 0, colorbar_len = 0.75, colorbar_title_side = "bottom", showgrid = FALSE, space = "rgb", lo = "#18A3AC", lomid = NULL, mid = NULL, midhi = NULL, hi = "#F48024", grid_gap = 0, limits = NULL, main = NULL, key_title = NULL, showticklabels = NULL, theme = choose_theme(getOption("rtemis_theme")), font_size = NULL, padding = 0, displayModeBar = TRUE, modeBar_file_format = "svg", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, ... ) } \arguments{ \item{x}{Numeric: Time.} \item{y}{Numeric: Frequency.} \item{z}{Numeric: Power.} \item{colorgrad_n}{Integer: Number of colors in the gradient.} \item{colors}{Character: Custom colors for the gradient.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{zlab}{Character: z-axis label.} \item{hover_xlab}{Character: x-axis label for hover.} \item{hover_ylab}{Character: y-axis label for hover.} \item{hover_zlab}{Character: z-axis label for hover.} \item{zmin}{Numeric: Minimum value for color scale.} \item{zmax}{Numeric: Maximum value for color scale.} \item{zauto}{Logical: If TRUE, automatically set zmin and zmax.} \item{hoverlabel_align}{Character: Alignment of hover labels.} \item{colorscale}{Character: Color scale.} \item{colorbar_y}{Numeric: Y position of colorbar.} \item{colorbar_yanchor}{Character: Y anchor of colorbar.} \item{colorbar_xpad}{Numeric: X padding of colorbar.} \item{colorbar_ypad}{Numeric: Y padding of colorbar.} \item{colorbar_len}{Numeric: Length of colorbar.} \item{colorbar_title_side}{Character: Side of colorbar title.} \item{showgrid}{Logical: If TRUE, show grid.} \item{space}{Character: Color space for gradient.} \item{lo}{Character: Low color for gradient.} \item{lomid}{Character: Low-mid color for gradient.} \item{mid}{Character: Mid color for gradient.} \item{midhi}{Character: Mid-high color for gradient.} \item{hi}{Character: High color for gradient.} \item{grid_gap}{Integer: Space between cells.} \item{limits}{Numeric, length 2: Determine color range. Default = NULL, which automatically centers values around 0.} \item{main}{Character: Main title.} \item{key_title}{Character: Title of the key.} \item{showticklabels}{Logical: If TRUE, show tick labels.} \item{theme}{\code{Theme} object.} \item{font_size}{Numeric: Font size.} \item{padding}{Numeric: Padding between cells.} \item{displayModeBar}{Logical: If TRUE, display the plotly mode bar.} \item{modeBar_file_format}{Character: File format for image exports from the mode bar.} \item{filename}{Character: Filename to save the plot. Default is NULL.} \item{file_width}{Numeric: Width of exported image.} \item{file_height}{Numeric: Height of exported image.} \item{file_scale}{Numeric: Scale of exported image.} \item{...}{Additional arguments to be passed to \code{heatmaply::heatmaply}.} } \value{ \code{plotly} object. } \description{ Draw interactive spectrograms using \code{plotly} } \details{ To set custom colors, use a minimum of \code{lo} and \code{hi}, optionally also \code{lomid}, \code{mid}, \code{midhi} colors and set \code{colorscale = NULL}. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # Example data time <- seq(0, 10, length.out = 100) freq <- seq(1, 100, length.out = 100) power <- outer(time, freq, function(t, f) sin(t) * cos(f)) draw_spectrogram( x = time, y = freq, z = power ) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_survfit.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_survfit.R \name{draw_survfit} \alias{draw_survfit} \title{Draw a survfit object} \usage{ draw_survfit( x, mode = "lines", symbol = "cross", line_shape = "hv", xlim = NULL, ylim = NULL, xlab = "Time", ylab = "Survival", main = NULL, legend_xy = c(1, 1), legend_xanchor = "right", legend_yanchor = "top", theme = choose_theme(getOption("rtemis_theme")), nrisk_table = FALSE, filename = NULL, ... ) } \arguments{ \item{x}{\code{survfit} object created by \link[survival:survfit]{survival::survfit}.} \item{mode}{Character, vector: "markers", "lines", "markers+lines".} \item{symbol}{Character: Symbol to use for the points.} \item{line_shape}{Character: Line shape for line plots. Options: "linear", "hv", "vh", "hvh", "vhv".} \item{xlim}{Numeric vector of length 2: x-axis limits.} \item{ylim}{Numeric vector of length 2: y-axis limits.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{main}{Character: Main title.} \item{legend_xy}{Numeric: Position of legend.} \item{legend_xanchor}{Character: X anchor for legend.} \item{legend_yanchor}{Character: Y anchor for legend.} \item{theme}{\code{Theme} object.} \item{nrisk_table}{Logical: If \code{TRUE}, subplot a table of the number at risk at each time point.} \item{filename}{Character: Filename to save plot.} \item{...}{Additional arguments passed to \link{draw_scatter}.} } \value{ \code{plotly} object. } \description{ Draw a \code{survfit} object using \link{draw_scatter}. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # Get the lung dataset data(cancer, package = "survival") sf1 <- survival::survfit(survival::Surv(time, status) ~ 1, data = lung) draw_survfit(sf1) sf2 <- survival::survfit(survival::Surv(time, status) ~ sex, data = lung) draw_survfit(sf2) # with N at risk table draw_survfit(sf2) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_table.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_table.R \name{draw_table} \alias{draw_table} \title{Simple HTML table} \usage{ draw_table( x, .ddSci = TRUE, main = NULL, main_col = "black", main_x = 0, main_xanchor = "auto", fill_col = "#18A3AC", table_bg = "white", bg = "white", line_col = "white", lwd = 1, header_font_col = "white", table_font_col = "gray20", font_size = 14, font_family = "Helvetica Neue", margin = list(l = 0, r = 5, t = 30, b = 0, pad = 0) ) } \arguments{ \item{x}{data.frame: Table to draw} \item{.ddSci}{Logical: If TRUE, apply \link{ddSci} to numeric columns.} \item{main}{Character: Table tile.} \item{main_col}{Color: Title color.} \item{main_x}{Float [0, 1]: Align title: 0: left, .5: center, 1: right.} \item{main_xanchor}{Character: "auto", "left", "right": plotly's layout xanchor for title.} \item{fill_col}{Color: Used to fill header with column names and first column with row names.} \item{table_bg}{Color: Table background.} \item{bg}{Color: Background.} \item{line_col}{Color: Line color.} \item{lwd}{Float: Line width.} \item{header_font_col}{Color: Header font color.} \item{table_font_col}{Color: Table font color.} \item{font_size}{Integer: Font size.} \item{font_family}{Character: Font family.} \item{margin}{List: plotly's margins.} } \value{ \code{plotly} object. } \description{ Draw an html table using \code{plotly} } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} df <- data.frame( Name = c("Alice", "Bob", "Charlie"), Age = c(25, 30, 35), Score = c(90.5, 85.0, 88.0) ) p <- draw_table( df, main = "Sample Table", main_col = "#00b2b2" ) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_ts.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_ts.R \name{draw_ts} \alias{draw_ts} \title{Interactive Timeseries Plots} \usage{ draw_ts( x, time, window = 7L, group = NULL, roll_fn = c("mean", "median", "max", "none"), roll_col = NULL, roll_alpha = 1, roll_lwd = 2, roll_name = NULL, alpha = NULL, align = "center", group_names = NULL, xlab = "Time", n_xticks = 12, scatter_type = "scatter", legend = TRUE, x_showspikes = TRUE, y_showspikes = FALSE, spikedash = "solid", spikemode = "across", spikesnap = "hovered data", spikecolor = NULL, spikethickness = 1, displayModeBar = TRUE, modeBar_file_format = "svg", theme = choose_theme(getOption("rtemis_theme")), palette = getOption("rtemis_palette", "rtms"), filename = NULL, file_width = 500, file_height = 500, file_scale = 1, ... ) } \arguments{ \item{x}{Numeric vector of values to plot or list of vectors} \item{time}{Numeric or Date vector of time corresponding to values of \code{x}} \item{window}{Integer: apply \code{roll_fn} over this many units of time} \item{group}{Factor defining groups} \item{roll_fn}{Character: "mean", "median", "max", or "sum": Function to apply on rolling windows of \code{x}} \item{roll_col}{Color for rolling line} \item{roll_alpha}{Numeric: transparency for rolling line} \item{roll_lwd}{Numeric: width of rolling line} \item{roll_name}{Rolling function name (for annotation)} \item{alpha}{Numeric [0, 1]: Transparency} \item{align}{Character: "center", "right", or "left"} \item{group_names}{Character vector of group names} \item{xlab}{Character: x-axis label} \item{n_xticks}{Integer: number of x-axis ticks to use (approximately)} \item{scatter_type}{Character: "scatter" or "lines"} \item{legend}{Logical: If TRUE, show legend} \item{x_showspikes}{Logical: If TRUE, show x-axis spikes on hover} \item{y_showspikes}{Logical: If TRUE, show y-axis spikes on hover} \item{spikedash}{Character: dash type string ("solid", "dot", "dash", "longdash", "dashdot", or "longdashdot") or a dash length list in px (eg "5px,10px,2px,2px")} \item{spikemode}{Character: If "toaxis", spike line is drawn from the data point to the axis the series is plotted on. If "across", the line is drawn across the entire plot area, and supercedes "toaxis". If "marker", then a marker dot is drawn on the axis the series is plotted on} \item{spikesnap}{Character: "data", "cursor", "hovered data". Determines whether spikelines are stuck to the cursor or to the closest datapoints.} \item{spikecolor}{Color for spike lines} \item{spikethickness}{Numeric: spike line thickness} \item{displayModeBar}{Logical: If TRUE, display plotly's modebar} \item{modeBar_file_format}{Character: modeBar image export file format} \item{theme}{\code{Theme} object.} \item{palette}{Character: palette name, or list of colors} \item{filename}{Character: Path to filename to save plot} \item{file_width}{Numeric: image export width} \item{file_height}{Numeric: image export height} \item{file_scale}{Numeric: image export scale} \item{...}{Additional arguments to be passed to \link{draw_scatter}} } \value{ \code{plotly} object. } \description{ Draw interactive timeseries plots using \code{plotly} } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} time <- sample(seq(as.Date("2020-03-01"), as.Date("2020-09-23"), length.out = 140)) x1 <- rnorm(140) x2 <- rnorm(140, 1, 1.2) # Single timeseries draw_ts(x1, time) # Multiple timeseries input as list draw_ts(list(Alpha = x1, Beta = x2), time) # Multiple timeseries grouped by group, different lengths time1 <- sample(seq(as.Date("2020-03-01"), as.Date("2020-07-23"), length.out = 100)) time2 <- sample(seq(as.Date("2020-05-01"), as.Date("2020-09-23"), length.out = 140)) time <- c(time1, time2) x <- c(rnorm(100), rnorm(140, 1, 1.5)) group <- c(rep("Alpha", 100), rep("Beta", 140)) draw_ts(x, time, 7, group) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_varimp.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_varimp.R \name{draw_varimp} \alias{draw_varimp} \title{Interactive Variable Importance Plot} \usage{ draw_varimp( x, names = NULL, main = NULL, type = c("bar", "line"), xlab = NULL, ylab = NULL, plot_top = 1, orientation = "v", line_width = 12, labelify = TRUE, alpha = 1, palette = get_palette(getOption("rtemis_palette")), mar = NULL, font_size = 16, axis_font_size = 14, theme = choose_theme(getOption("rtemis_theme")), showlegend = TRUE, filename = NULL, file_width = 500, file_height = 500, file_scale = 1 ) } \arguments{ \item{x}{Numeric vector (or coercible to numeric): Input.} \item{names}{Vector, string: Names of features.} \item{main}{Character: Main title.} \item{type}{Character: "bar" or "line".} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{plot_top}{Integer: Plot this many top features.} \item{orientation}{Character: "h" or "v".} \item{line_width}{Numeric: Line width.} \item{labelify}{Logical: If TRUE, labelify feature names.} \item{alpha}{Numeric: Transparency.} \item{palette}{Character vector: Colors to use.} \item{mar}{Vector, numeric, length 4: Plot margins in pixels (NOT inches).} \item{font_size}{Integer: Overall font size to use (essentially for the title at this point).} \item{axis_font_size}{Integer: Font size to use for axis labels and tick labels.} \item{theme}{\code{Theme} object.} \item{showlegend}{Logical: If TRUE, show legend.} \item{filename}{Character: Path to save the plot image.} \item{file_width}{Numeric: Width of the saved plot image.} \item{file_height}{Numeric: Height of the saved plot image.} \item{file_scale}{Numeric: Scale of the saved plot image.} } \value{ \code{plotly} object. } \description{ Plot variable importance using \code{plotly} } \details{ A simple \code{plotly} wrapper to plot horizontal barplots, sorted by value, which can be used to visualize variable importance, model coefficients, etc. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # synthetic data x <- rnorm(10) names(x) <- paste0("Feature_", seq(x)) draw_varimp(x) draw_varimp(x, orientation = "h") \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_volcano.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_volcano.R \name{draw_volcano} \alias{draw_volcano} \title{Volcano Plot} \usage{ draw_volcano( x, pvals, xnames = NULL, group = NULL, x_thresh = 0, p_thresh = 0.05, p_adjust_method = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"), p_transform = function(x) -log10(x), legend = NULL, legend_lo = NULL, legend_hi = NULL, label_lo = "Low", label_hi = "High", main = NULL, xlab = NULL, ylab = NULL, margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0), xlim = NULL, ylim = NULL, alpha = NULL, hline = NULL, hline_col = NULL, hline_width = 1, hline_dash = "solid", hline_annotate = NULL, hline_annotation_x = 1, theme = choose_theme(getOption("rtemis_theme")), annotate = TRUE, annotate_col = theme[["labs_col"]], font_size = 16, palette = NULL, legend_x_lo = NULL, legend_x_hi = NULL, legend_y = 0.97, annotate_n = 7L, ax_lo = NULL, ay_lo = NULL, ax_hi = NULL, ay_hi = NULL, annotate_alpha = 0.7, hovertext = NULL, displayModeBar = "hover", filename = NULL, file_width = 500, file_height = 500, file_scale = 1, verbosity = 1L, ... ) } \arguments{ \item{x}{Numeric vector: Input values, e.g. log2 fold change, coefficients, etc.} \item{pvals}{Numeric vector: p-values.} \item{xnames}{Character vector: \code{x} names.} \item{group}{Optional factor: Used to color code points. If NULL, significant points below \code{x_thresh}, non-significant points, and significant points above \code{x_thresh} will be plotted with the first, second and third color of \code{palette}.} \item{x_thresh}{Numeric x-axis threshold separating low from high.} \item{p_thresh}{Numeric: p-value threshold of significance.} \item{p_adjust_method}{Character: p-value adjustment method. "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none". Default = "holm". Use "none" for raw p-values.} \item{p_transform}{function.} \item{legend}{Logical: If TRUE, show legend. Will default to FALSE, if \code{group = NULL}, otherwise to TRUE.} \item{legend_lo}{Character: Legend to annotate significant points below the \code{x_thresh}.} \item{legend_hi}{Character: Legend to annotate significant points above the \code{x_thresh}.} \item{label_lo}{Character: label for low values.} \item{label_hi}{Character: label for high values.} \item{main}{Character: Main title.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{margin}{Named list of plot margins. Default = \code{list(b = 65, l = 65, t = 50, r = 10, pad = 0)}.} \item{xlim}{Numeric vector, length 2: x-axis limits.} \item{ylim}{Numeric vector, length 2: y-axis limits.} \item{alpha}{Numeric: point transparency.} \item{hline}{Numeric: If defined, draw a horizontal line at this y value.} \item{hline_col}{Color for \code{hline}.} \item{hline_width}{Numeric: Width for \code{hline}.} \item{hline_dash}{Character: Type of line to draw: "solid", "dot", "dash", "longdash", "dashdot", or "longdashdot".} \item{hline_annotate}{Character: Text of horizontal line annotation if \code{hline} is set.} \item{hline_annotation_x}{Numeric: x position to place annotation with paper as reference. 0: to the left of the plot area; 1: to the right of the plot area.} \item{theme}{\code{Theme} object.} \item{annotate}{Logical: If TRUE, annotate significant points.} \item{annotate_col}{Color for annotations.} \item{font_size}{Integer: Font size.} \item{palette}{Character vector: Colors to use. If \code{group} is NULL, the first, second and third colors will be used for significant points with negative coefficients, non-significant points, and significant points with positive coefficients, respectively. If \code{group} is not NULL, colors will be assigned to groups, in order of appearance.} \item{legend_x_lo}{Numeric: x position of \code{legend_lo}.} \item{legend_x_hi}{Numeric: x position of \code{legend_hi}.} \item{legend_y}{Numeric: y position for \code{legend_lo} and \code{legend_hi}.} \item{annotate_n}{Integer: Number of significant points to annotate.} \item{ax_lo}{Numeric: Sets the x component of the arrow tail about the arrow head for significant points below \code{x_thresh}.} \item{ay_lo}{Numeric: Sets the y component of the arrow tail about the arrow head for significant points below \code{x_thresh}.} \item{ax_hi}{Numeric: Sets the x component of the arrow tail about the arrow head for significant points above \code{x_thresh}.} \item{ay_hi}{Numeric: Sets the y component of the arrow tail about the arrow head for significant points above \code{x_thresh}.} \item{annotate_alpha}{Numeric: Transparency for annotations.} \item{hovertext}{Character vector: Text to display on hover.} \item{displayModeBar}{Logical: If TRUE, display plotly mode bar.} \item{filename}{Character: Path to save the plot image.} \item{file_width}{Numeric: Width of the saved plot image.} \item{file_height}{Numeric: Height of the saved plot image.} \item{file_scale}{Numeric: Scale of the saved plot image.} \item{verbosity}{Integer: Verbosity level.} \item{...}{Additional arguments passed to \link{draw_scatter}.} } \value{ \code{plotly} object. } \description{ Volcano Plot } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} set.seed(2019) y <- rnormmat(500, 500, return_df = TRUE) x <- data.frame(x = y[, 3] + y[, 5] - y[, 9] + y[, 15] + rnorm(500)) mod <- massGLM(x, y) draw_volcano(summary(mod)[["Coefficient_x"]], summary(mod)[["p_value_x"]]) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/draw_xt.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/draw_xt.R \name{draw_xt} \alias{draw_xt} \title{Plot timeseries data} \usage{ draw_xt( x, y, x2 = NULL, y2 = NULL, which_xy = NULL, which_xy2 = NULL, shade_bin = NULL, shade_interval = NULL, shade_col = NULL, shade_x = NULL, shade_name = "", shade_showlegend = FALSE, ynames = NULL, y2names = NULL, xlab = NULL, ylab = NULL, y2lab = NULL, xunits = NULL, yunits = NULL, y2units = NULL, yunits_col = NULL, y2units_col = NULL, zt = NULL, show_zt = TRUE, show_zt_every = NULL, zt_nticks = 18L, main = NULL, main_y = 1, main_yanchor = "bottom", x_nticks = 0, y_nticks = 0, show_rangeslider = NULL, slider_start = NULL, slider_end = NULL, theme = choose_theme(getOption("rtemis_theme")), palette = get_palette(getOption("rtemis_palette")), font_size = 16, yfill = "none", y2fill = "none", fill_alpha = 0.2, yline_width = 2, y2line_width = 2, x_showspikes = TRUE, spike_dash = "solid", spike_col = NULL, x_spike_thickness = -2, tickfont_size = 16, x_tickmode = "auto", x_tickvals = NULL, x_ticktext = NULL, x_tickangle = NULL, legend_x = 0, legend_y = 1.1, legend_xanchor = "left", legend_yanchor = "top", legend_orientation = "h", margin = list(l = 75, r = 75, b = 75, t = 75), x_standoff = 20L, y_standoff = 20L, y2_standoff = 20L, hovermode = "x", displayModeBar = TRUE, modeBar_file_format = "svg", scrollZoom = TRUE, filename = NULL, file_width = 960, file_height = 500, file_scale = 1 ) } \arguments{ \item{x}{Datetime vector or list of vectors.} \item{y}{Numeric vector or named list of vectors: y-axis data.} \item{x2}{Datetime vector or list of vectors, optional: must be provided if \code{y2} does not correspond to values in \code{x}. A single x-axis will be drawn for all values in \code{x} and \code{x2}.} \item{y2}{Numeric vector, optional: If provided, a second y-axis will be added to the right side of the plot.} \item{which_xy}{Integer vector: Indices of \code{x} and \code{y} to plot. If not provided, will select up to the first two x-y traces.} \item{which_xy2}{Integer vector: Indices of \code{x2} and \code{y2} to plot. If not provided, will select up to the first two x2-y2 traces.} \item{shade_bin}{Integer vector \{0, 1\}: Time points in \code{x} to shade on the plot. For example, if there are 10 time points in \code{x}, and you want to shade time points 3 to 7, \code{shade_bin = c(0, 0, 1, 1, 1, 1, 1, 0, 0, 0)}. Only set \code{shade_bin} or \code{shade_interval}, not both.} \item{shade_interval}{List of numeric vectors: Intervals to shade on the plot. Only set \code{shade_bin} or \code{shade_interval}, not both.} \item{shade_col}{Color: Color to shade intervals.} \item{shade_x}{Numeric vector: x-values to use for shading.} \item{shade_name}{Character: Name for shaded intervals.} \item{shade_showlegend}{Logical: If TRUE, show legend for shaded intervals.} \item{ynames}{Character vector, optional: Names for each vector in \code{y}.} \item{y2names}{Character vector, optional: Names for each vector in \code{y2}.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{y2lab}{Character: y2-axis label.} \item{xunits}{Character: x-axis units.} \item{yunits}{Character: y-axis units.} \item{y2units}{Character: y2-axis units.} \item{yunits_col}{Color for y-axis units.} \item{y2units_col}{Color for y2-axis units.} \item{zt}{Numeric vector: Zeitgeber time. If provided, will be shown on the x-axis instead of \code{x}. To be used only with a single \code{x} vector and no \code{x2}.} \item{show_zt}{Logical: If TRUE, show zt on x-axis, if zt is provided.} \item{show_zt_every}{Optional integer: Show zt every \code{show_zt_every} ticks. If NULL, will be calculated to be \code{x_nticks} +/- 1 if \code{x_nticks} is not 0, otherwise 12 +/- 1.} \item{zt_nticks}{Integer: Number of zt ticks to show. Only used if \code{show_zt_every} is NULL. The actual number of ticks shown will depend on the periodicity of zt, so that zt = 0 is always included.} \item{main}{Character: Main title.} \item{main_y}{Numeric: Y position of main title.} \item{main_yanchor}{Character: "top", "middle", "bottom".} \item{x_nticks}{Integer: Number of ticks on x-axis.} \item{y_nticks}{Integer: Number of ticks on y-axis.} \item{show_rangeslider}{Logical: If TRUE, show a range slider.} \item{slider_start}{Numeric: Start of range slider.} \item{slider_end}{Numeric: End of range slider.} \item{theme}{\code{Theme} object.} \item{palette}{Character vector: Colors to be used to draw each vector in \code{y} and \code{y2}, in order.} \item{font_size}{Numeric: Font size for text.} \item{yfill}{Character: Fill type for y-axis: "none", "tozeroy", "tonexty".} \item{y2fill}{Character: Fill type for y2-axis: "none", "tozeroy", "tonexty".} \item{fill_alpha}{Numeric: Fill opacity for y-axis.} \item{yline_width}{Numeric: Line width for y-axis lines.} \item{y2line_width}{Numeric: Line width for y2-axis lines.} \item{x_showspikes}{Logical: If TRUE, show spikes on x-axis.} \item{spike_dash}{Character: Dash type for spikes: "solid", "dot", "dash", "longdash", "dashdot", "longdashdot".} \item{spike_col}{Color for spikes.} \item{x_spike_thickness}{Numeric: Thickness of spikes. \code{-2} avoids drawing border around spikes.} \item{tickfont_size}{Numeric: Font size for tick labels.} \item{x_tickmode}{Character: "auto", "linear", "array".} \item{x_tickvals}{Numeric vector: Tick positions.} \item{x_ticktext}{Character vector: Tick labels.} \item{x_tickangle}{Numeric: Angle of tick labels.} \item{legend_x}{Numeric: X position of legend.} \item{legend_y}{Numeric: Y position of legend.} \item{legend_xanchor}{Character: "left", "center", "right".} \item{legend_yanchor}{Character: "top", "middle", "bottom".} \item{legend_orientation}{Character: "v" for vertical, "h" for horizontal.} \item{margin}{Named list with 4 numeric values: "l", "r", "t", "b" for left, right, top, bottom margins.} \item{x_standoff}{Numeric: Distance from x-axis to x-axis label.} \item{y_standoff}{Numeric: Distance from y-axis to y-axis label.} \item{y2_standoff}{Numeric: Distance from y2-axis to y2-axis label.} \item{hovermode}{Character: "closest", "x", "x unified".} \item{displayModeBar}{Logical: If TRUE, display plotly mode bar.} \item{modeBar_file_format}{Character: "png", "svg", "jpeg", "webp", "pdf": file format for mode bar image export.} \item{scrollZoom}{Logical: If TRUE, enable zooming by scrolling.} \item{filename}{Character: Path to save the plot image.} \item{file_width}{Numeric: Width of the saved plot image.} \item{file_height}{Numeric: Height of the saved plot image.} \item{file_scale}{Numeric: Scale of the saved plot image.} } \value{ \code{plotly} object. } \description{ Plot timeseries data } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} datetime <- seq( as.POSIXct("2020-01-01 00:00"), as.POSIXct("2020-01-02 00:00"), by = "hour" ) df <- data.frame( datetime = datetime, value1 = rnorm(length(datetime)), value2 = rnorm(length(datetime)) ) draw_xt(df, x = df[, 1], y = df[, 2:3]) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/dt_describe.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_describe} \alias{dt_describe} \title{Describe data.table} \usage{ dt_describe(x, verbosity = 1L) } \arguments{ \item{x}{data.table: Input data.table.} \item{verbosity}{Integer: If > 0, print output to console.} } \value{ List with three data.tables: Numeric, Categorical, and Date. } \description{ Describe data.table } \examples{ library(data.table) origin <- as.POSIXct("2022-01-01 00:00:00", tz = "America/Los_Angeles") x <- data.table( ID = paste0("ID", 1:10), V1 = rnorm(10), V2 = rnorm(10, 20, 3), V1_datetime = as.POSIXct( seq( 1, 1e7, length.out = 10 ), origin = origin ), V2_datetime = as.POSIXct( seq( 1, 1e7, length.out = 10 ), origin = origin ), C1 = sample(c("alpha", "beta", "gamma"), 10, TRUE), F1 = factor(sample(c("delta", "epsilon", "zeta"), 10, TRUE)) ) } \author{ EDG } ================================================ FILE: man/dt_inspect_types.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_inspect_types} \alias{dt_inspect_types} \title{Inspect column types} \usage{ dt_inspect_types(x, cols = NULL, verbosity = 1L) } \arguments{ \item{x}{data.table: Input data.table.} \item{cols}{Character vector: columns to inspect.} \item{verbosity}{Integer: Verbosity level.} } \value{ Character vector. } \description{ Will attempt to identify columns that should be numeric but are either character or factor by running \link{inspect_type} on each column. } \examples{ library(data.table) x <- data.table( id = 8001:8006, a = c("3", "5", "undefined", "21", "4", NA), b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"), c = c(1, 2, 3, 4, 5, 6) ) dt_inspect_types(x) } \author{ EDG } ================================================ FILE: man/dt_keybin_reshape.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_keybin_reshape} \alias{dt_keybin_reshape} \title{Long to wide key-value reshaping} \usage{ dt_keybin_reshape( x, id_name, key_name, positive = 1, negative = 0, xname = NULL, verbosity = 1L ) } \arguments{ \item{x}{\code{data.table} object.} \item{id_name}{Character: Name of column in \code{x} that defines the IDs identifying individual rows.} \item{key_name}{Character: Name of column in \code{x} that holds the key.} \item{positive}{Numeric or Character: Used to fill id ~ key combination present in the long format input \code{x}.} \item{negative}{Numeric or Character: Used to fill id ~ key combination NOT present in the long format input \code{x}.} \item{xname}{Character: Name of \code{x} to be used in messages.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{data.table} in wide format. } \description{ Reshape a long format \code{data.table} using key-value pairs with \code{data.table::dcast} } \examples{ library(data.table) x <- data.table( ID = rep(1:3, each = 2), Dx = c("A", "C", "B", "C", "D", "A") ) dt_keybin_reshape(x, id_name = "ID", key_name = "Dx") } \author{ EDG } ================================================ FILE: man/dt_merge.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_merge} \alias{dt_merge} \title{Merge data.tables} \usage{ dt_merge( left, right, on = NULL, left_on = NULL, right_on = NULL, how = "left", left_name = NULL, right_name = NULL, left_suffix = NULL, right_suffix = NULL, verbosity = 1L, ... ) } \arguments{ \item{left}{data.table} \item{right}{data.table} \item{on}{Character: Name of column to join on.} \item{left_on}{Character: Name of column on left table.} \item{right_on}{Character: Name of column on right table.} \item{how}{Character: Type of join: "inner", "left", "right", "outer".} \item{left_name}{Character: Name of left table.} \item{right_name}{Character: Name of right table.} \item{left_suffix}{Character: If provided, add this suffix to all left column names, excluding on/left_on.} \item{right_suffix}{Character: If provided, add this suffix to all right column names, excluding on/right_on.} \item{verbosity}{Integer: Verbosity level.} \item{...}{Additional arguments to be passed to \code{data.table::merge}.} } \value{ Merged data.table. } \description{ Merge data.tables } \examples{ library(data.table) xleft <- data.table(ID = 1:5, Alpha = letters[1:5]) xright <- data.table(ID = c(3, 4, 5, 6), Beta = LETTERS[3:6]) xlr_inner <- dt_merge(xleft, xright, on = "ID", how = "inner") } \author{ EDG } ================================================ FILE: man/dt_names_by_attr.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_names_by_attr} \alias{dt_names_by_attr} \title{List column names by attribute} \usage{ dt_names_by_attr(x, attribute, exact = TRUE, sorted = TRUE) } \arguments{ \item{x}{data.table: Input data.table.} \item{attribute}{Character: name of attribute.} \item{exact}{Logical: If TRUE, use exact matching.} \item{sorted}{Logical: If TRUE, sort the output.} } \value{ Character vector. } \description{ List column names by attribute } \examples{ library(data.table) x <- data.table( id = 1:5, sbp = rnorm(5, 120, 15), dbp = rnorm(5, 80, 10), paO2 = rnorm(5, 90, 10), paCO2 = rnorm(5, 40, 5) ) setattr(x[["id"]], "source", "demographics") setattr(x[["sbp"]], "source", "outpatient") setattr(x[["dbp"]], "source", "outpatient") setattr(x[["paO2"]], "source", "icu") setattr(x[["paCO2"]], "source", "icu") dt_names_by_attr(x, "source", "outpatient") } \author{ EDG } ================================================ FILE: man/dt_nunique_perfeat.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_nunique_perfeat} \alias{dt_nunique_perfeat} \title{Number of unique values per feature} \usage{ dt_nunique_perfeat(x, excludeNA = FALSE, limit = 20L, verbosity = 1L) } \arguments{ \item{x}{data.table: Input data.table.} \item{excludeNA}{Logical: If TRUE, exclude NA values.} \item{limit}{Integer: Print up to this many features. Set to -1L to print all.} \item{verbosity}{Integer: If > 0, print output to console.} } \value{ Named integer vector of length \code{NCOL(x)} with number of unique values per column/feature, invisibly. } \description{ Number of unique values per feature } \examples{ library(data.table) ir <- as.data.table(iris) dt_nunique_perfeat(ir) } \author{ EDG } ================================================ FILE: man/dt_pctmatch.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_pctmatch} \alias{dt_pctmatch} \title{Get N and percent match of values between two columns of two data.tables} \usage{ dt_pctmatch(x, y, on = NULL, left_on = NULL, right_on = NULL, verbosity = 1L) } \arguments{ \item{x}{data.table: First input data.table.} \item{y}{data.table: Second input data.table.} \item{on}{Integer or character: column to read in \code{x} and \code{y}, if it is the same} \item{left_on}{Integer or character: column to read in \code{x}} \item{right_on}{Integer or character: column to read in \code{y}} \item{verbosity}{Integer: Verbosity level.} } \value{ list. } \description{ Get N and percent match of values between two columns of two data.tables } \examples{ library(data.table) x <- data.table(ID = 1:5, Alpha = letters[1:5]) y <- data.table(ID = c(3, 4, 5, 6), Beta = LETTERS[3:6]) dt_pctmatch(x, y, on = "ID") } \author{ EDG } ================================================ FILE: man/dt_pctmissing.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_pctmissing} \alias{dt_pctmissing} \title{Get percent of missing values from every column} \usage{ dt_pctmissing(x, verbosity = 1L) } \arguments{ \item{x}{data.frame or data.table} \item{verbosity}{Integer: Verbosity level.} } \value{ list } \description{ Get percent of missing values from every column } \examples{ library(data.table) x <- data.table(a = c(1, 2, NA, 4), b = c(NA, NA, 3, 4), c = c("A", "B", "C", NA)) dt_pctmissing(x) } \author{ EDG } ================================================ FILE: man/dt_set_autotypes.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_set_autotypes} \alias{dt_set_autotypes} \title{Set column types automatically} \usage{ dt_set_autotypes(x, cols = NULL, verbosity = 1L) } \arguments{ \item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}, if needed.} \item{cols}{Character vector: columns to work on. If not defined, will work on all columns} \item{verbosity}{Integer: Verbosity level.} } \value{ data.table, invisibly. } \description{ This function inspects a data.table and attempts to identify columns that should be numeric but have been read in as character, and fixes their type \emph{\strong{in-place}}. This can happen when one or more fields contain non-numeric characters, for example. } \examples{ library(data.table) x <- data.table( id = 8001:8006, a = c("3", "5", "undefined", "21", "4", NA), b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"), c = c(1, 2, 3, 4, 5, 6) ) str(x) # ***in-place*** operation means no assignment is needed dt_set_autotypes(x) str(x) # Try excluding column 'a' from autotyping x <- data.table( id = 8001:8006, a = c("3", "5", "undefined", "21", "4", NA), b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"), c = c(1, 2, 3, 4, 5, 6) ) str(x) # exclude column 'a' from autotyping dt_set_autotypes(x, cols = setdiff(names(x), "a")) str(x) } \author{ EDG } ================================================ FILE: man/dt_set_clean_all.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_set_clean_all} \alias{dt_set_clean_all} \title{Clean column names and factor levels \emph{\strong{in-place}}} \usage{ dt_set_clean_all(x, prefix_digits = NA) } \arguments{ \item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}, if needed.} \item{prefix_digits}{Character: prefix to add to names beginning with a digit. Set to NA to skip} } \value{ Nothing, modifies \code{x} \emph{\strong{in-place}}. } \description{ Clean column names and factor levels \emph{\strong{in-place}} } \examples{ library(data.table) x <- as.data.table(iris) levels(x[["Species"]]) <- c("setosa:iris", "versicolor$iris", "virginica iris") names(x) levels(x[["Species"]]) # ***in-place*** operation means no assignment is needed dt_set_clean_all(x) names(x) levels(x[["Species"]]) } \author{ EDG } ================================================ FILE: man/dt_set_cleanfactorlevels.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_set_cleanfactorlevels} \alias{dt_set_cleanfactorlevels} \title{Clean factor levels of data.table \emph{\strong{in-place}}} \usage{ dt_set_cleanfactorlevels(x, prefix_digits = NA) } \arguments{ \item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}.} \item{prefix_digits}{Character: If not NA, add this prefix to all factor levels that are numbers} } \value{ Nothing, modifies \code{x} \emph{\strong{in-place}}. } \description{ Finds all factors in a data.table and cleans factor levels to include only underscore symbols } \examples{ library(data.table) x <- as.data.table(iris) levels(x[["Species"]]) <- c("setosa:iris", "versicolor$iris", "virginica iris") levels(x[["Species"]]) dt_set_cleanfactorlevels(x) levels(x[["Species"]]) } \author{ EDG } ================================================ FILE: man/dt_set_logical2factor.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.table.R \name{dt_set_logical2factor} \alias{dt_set_logical2factor} \title{Convert data.table logical columns to factors} \usage{ dt_set_logical2factor( x, cols = NULL, labels = c("False", "True"), maintain_attributes = TRUE, fillNA = NULL ) } \arguments{ \item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}.} \item{cols}{Optional Integer or character: columns to convert. If NULL, operates on all logical columns.} \item{labels}{Character: labels for factor levels.} \item{maintain_attributes}{Logical: If TRUE, maintain column attributes.} \item{fillNA}{Optional Character: If not NULL, fill NA values with this constant.} } \value{ data.table, invisibly. } \description{ Convert data.table logical columns to factors with custom labels \emph{\strong{in-place}} } \examples{ library(data.table) x <- data.table(a = 1:5, b = c(TRUE, FALSE, FALSE, FALSE, TRUE)) x dt_set_logical2factor(x) x z <- data.table( alpha = 1:5, beta = c(TRUE, FALSE, TRUE, NA, TRUE), gamma = c(FALSE, FALSE, TRUE, FALSE, NA) ) # You can usee fillNA to fill NA values with a constant dt_set_logical2factor(z, cols = "beta", labels = c("No", "Yes"), fillNA = "No") z w <- data.table(mango = 1:5, banana = c(FALSE, FALSE, TRUE, TRUE, FALSE)) w dt_set_logical2factor(w, cols = 2, labels = c("Ugh", "Huh")) w # Column attributes are maintained by default: z <- data.table( alpha = 1:5, beta = c(TRUE, FALSE, TRUE, NA, TRUE), gamma = c(FALSE, FALSE, TRUE, FALSE, NA) ) for (i in seq_along(z)) setattr(z[[i]], "source", "Guava") str(z) dt_set_logical2factor(z, cols = "beta", labels = c("No", "Yes")) str(z) } \author{ EDG } ================================================ FILE: man/dt_set_one_hot.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/preprocess.R \name{dt_set_one_hot} \alias{dt_set_one_hot} \title{Convert data.table's factor to one-hot encoding \emph{\strong{in-place}}} \usage{ dt_set_one_hot(x, xname = NULL, verbosity = 1L) } \arguments{ \item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}.} \item{xname}{Character, optional: Dataset name.} \item{verbosity}{Integer: Verbosity level.} } \value{ The input, invisibly, after it has been modified \emph{\strong{in-place}}. } \description{ Convert data.table's factor to one-hot encoding \emph{\strong{in-place}} } \examples{ ir <- data.table::as.data.table(iris) # dt_set_one_hot operates ***in-place***; therefore no assignment is used: dt_set_one_hot(ir) ir } \author{ EDG } ================================================ FILE: man/exc.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{exc} \alias{exc} \title{Exclude columns by character or numeric vector.} \usage{ exc(x, idx) } \arguments{ \item{x}{tabular data.} \item{idx}{Character or numeric vector: Column names or indices to exclude.} } \value{ data.frame, tibble, or data.table. } \description{ Exclude columns by character or numeric vector. } \examples{ exc(iris, "Species") |> head() exc(iris, c(1, 3)) |> head() } \author{ EDG } ================================================ FILE: man/feature_matrix.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/preprocess.R \name{feature_matrix} \alias{feature_matrix} \title{Convert tabular data to feature matrix} \usage{ feature_matrix(x) } \arguments{ \item{x}{tabular data: Input data to convert to a feature matrix.} } \value{ Matrix with features. Factors are one-hot encoded, if present. } \description{ Convert a tabular dataset to a matrix, one-hot encoding factors, if present. } \details{ This is a convenience function that uses \code{\link[=features]{features()}}, \code{\link[=preprocess]{preprocess()}}, \code{as.matrix()}. } \examples{ # reorder columns so that we have a categorical feature x <- set_outcome(iris, "Sepal.Length") feature_matrix(x) |> head() } \author{ EDG } ================================================ FILE: man/feature_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{feature_names} \alias{feature_names} \title{Get feature names} \usage{ feature_names(x) } \arguments{ \item{x}{tabular data.} } \value{ Character vector of feature names. } \description{ Returns all column names except the last one } \details{ This applied to tabular datasets used for supervised learning in rtemis, where, by convention, the last column is the outcome variable and all other columns are features. } \examples{ feature_names(iris) } \author{ EDG } ================================================ FILE: man/features.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{features} \alias{features} \title{Get features from tabular data} \usage{ features(x) } \arguments{ \item{x}{tabular data: Input data to get features from.} } \value{ Object of the same class as the input, after removing the last column. } \description{ Returns all columns except the last one. } \details{ This can be applied to tabular datasets used for supervised learning in \pkg{rtemis}, where, by convention, the last column is the outcome variable and all other columns are features. } \examples{ features(iris) |> head() } \author{ EDG } ================================================ FILE: man/get_factor_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{get_factor_names} \alias{get_factor_names} \title{Get factor names} \usage{ get_factor_names(x) } \arguments{ \item{x}{tabular data.} } \value{ Character vector of factor names. } \description{ Get factor names } \details{ This applied to tabular datasets used for supervised learning in rtemis, where, by convention, the last column is the outcome variable and all other columns are features. } \examples{ get_factor_names(iris) } \author{ EDG } ================================================ FILE: man/get_mode.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{get_mode} \alias{get_mode} \title{Get the mode of a factor or integer} \usage{ get_mode(x, na.rm = TRUE, getlast = TRUE, retain_class = TRUE) } \arguments{ \item{x}{Vector, factor or integer: Input data.} \item{na.rm}{Logical: If TRUE, exclude NAs (using \code{na.exclude(x)}).} \item{getlast}{Logical: If TRUE, get the last value in case of ties.} \item{retain_class}{Logical: If TRUE, output is always same class as input.} } \value{ The mode of \code{x} } \description{ Returns the mode of a factor or integer } \examples{ x <- c(9, 3, 4, 4, 0, 2, 2, NA) get_mode(x) x <- c(9, 3, 2, 2, 0, 4, 4, NA) get_mode(x) get_mode(x, getlast = FALSE) } \author{ EDG } ================================================ FILE: man/get_msg_sink.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/msg.R \name{get_msg_sink} \alias{get_msg_sink} \title{Get the current rtemis message sink} \usage{ get_msg_sink() } \value{ The currently registered sink function, or \code{NULL} if none is set. } \description{ Get the current rtemis message sink } \seealso{ \code{\link[=set_msg_sink]{set_msg_sink()}}, \code{\link[=with_msg_sink]{with_msg_sink()}}. } \author{ EDG } ================================================ FILE: man/get_palette.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_palettes.R \name{get_palette} \alias{get_palette} \title{Get Color Palette} \usage{ get_palette(palette = NULL, verbosity = 1L) } \arguments{ \item{palette}{Character: Name of palette to return. Default = NULL: available palette names are printed and no palette is returned.} \item{verbosity}{Integer: Verbosity level.} } \value{ Character vector of colors for the specified palette, or invisibly returns list of available palettes if \code{palette = NULL}. } \description{ \code{get_palette()} returns a color palette (character vector of colors). Without arguments, prints names of available color palettes. Each palette is a named list of hexadecimal color definitions which can be used with any graphics function. } \examples{ # Print available palettes get_palette() # Get the Imperial palette get_palette("imperial") } \author{ EDG } ================================================ FILE: man/getnames.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_df.R \name{getnames} \alias{getnames} \alias{getfactornames} \alias{getnumericnames} \alias{getlogicalnames} \alias{getcharacternames} \alias{getdatenames} \title{Get names by string matching or class} \usage{ getnames( x, pattern = NULL, starts_with = NULL, ends_with = NULL, ignore_case = TRUE ) getfactornames(x) getnumericnames(x) getlogicalnames(x) getcharacternames(x) getdatenames(x) } \arguments{ \item{x}{object with \code{names()} method.} \item{pattern}{Character: pattern to match anywhere in names of x.} \item{starts_with}{Character: pattern to match in the beginning of names of x.} \item{ends_with}{Character: pattern to match at the end of names of x.} \item{ignore_case}{Logical: If TRUE, well, ignore case.} } \value{ Character vector of matched names. } \description{ Get names by string matching or class } \details{ For \code{getnames()} only: \code{pattern}, \code{starts_with}, and \code{ends_with} are applied sequentially. If more than one is provided, the result will be the intersection of all matches. } \examples{ getnames(iris, starts_with = "Sepal") getnames(iris, ends_with = "Width") getfactornames(iris) getnumericnames(iris) } \author{ EDG } ================================================ FILE: man/getnamesandtypes.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_df.R \name{getnamesandtypes} \alias{getnamesandtypes} \title{Get data.frame names and types} \usage{ getnamesandtypes(x) } \arguments{ \item{x}{data.frame / data.table or similar} } \value{ character vector of column names with attribute "type" holding the class of each column } \description{ Get data.frame names and types } \examples{ getnamesandtypes(iris) } \author{ EDG } ================================================ FILE: man/grapes-BC-grapes.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/preprocess.R \name{\%BC\%} \alias{\%BC\%} \title{Binary matrix times character vector} \usage{ x \%BC\% labels } \arguments{ \item{x}{A binary matrix or data.frame} \item{labels}{Character vector length equal to \code{ncol(x)}} } \value{ a character vector } \description{ Binary matrix times character vector } \author{ EDG } ================================================ FILE: man/inc.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{inc} \alias{inc} \title{Select (include) columns by character or numeric vector.} \usage{ inc(x, idx) } \arguments{ \item{x}{tabular data.} \item{idx}{Character or numeric vector: Column names or indices to include.} } \value{ data.frame, tibble, or data.table. } \description{ Select (include) columns by character or numeric vector. } \examples{ inc(iris, c(3, 4)) |> head() inc(iris, c("Sepal.Length", "Species")) |> head() } \author{ EDG } ================================================ FILE: man/index_col_by_attr.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.R \name{index_col_by_attr} \alias{index_col_by_attr} \title{Index columns by attribute name & value} \usage{ index_col_by_attr(x, name, value, exact = TRUE) } \arguments{ \item{x}{tabular data.} \item{name}{Character: Name of attribute.} \item{value}{Character: Value of attribute.} \item{exact}{Logical: Passed to \code{attr} when retrieving attribute value. If \code{TRUE}, attribute name must match \code{name} exactly, otherwise, partial match is allowed.} } \value{ Integer vector. } \description{ Index columns by attribute name & value } \examples{ library(data.table) x <- data.table( id = 1:5, sbp = rnorm(5, 120, 15), dbp = rnorm(5, 80, 10), paO2 = rnorm(5, 90, 10), paCO2 = rnorm(5, 40, 5) ) setattr(x[["sbp"]], "source", "outpatient") setattr(x[["dbp"]], "source", "outpatient") setattr(x[["paO2"]], "source", "icu") setattr(x[["paCO2"]], "source", "icu") index_col_by_attr(x, "source", "icu") } \author{ EDG } ================================================ FILE: man/init_project_dir.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{init_project_dir} \alias{init_project_dir} \title{Initialize Project Directory} \usage{ init_project_dir(path, output_dir = "Out", verbosity = 1L) } \arguments{ \item{path}{Character: Path to initialize project directory in.} \item{output_dir}{Character: Name of output directory to create.} \item{verbosity}{Integer: Verbosity level.} } \value{ Character: the path where the project directory was initialized, invisibly. } \description{ Initializes Directory Structure: "R", "Data", "Results" } \examples{ \dontrun{ # Will create "my_project" directory with init_project_dir("my_project") } } \author{ EDG } ================================================ FILE: man/inspect.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{inspect} \alias{inspect} \title{Inspect rtemis object} \usage{ inspect(x) } \arguments{ \item{x}{R object to inspect.} } \value{ Called for side effect of printing information to console; returns character string invisibly. } \description{ Inspect rtemis object } \examples{ inspect(iris) } \author{ EDG } ================================================ FILE: man/inspect_type.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.R \name{inspect_type} \alias{inspect_type} \title{Inspect character and factor vector} \usage{ inspect_type(x, xname = NULL, verbosity = 1L, thresh = 0.5, na.omit = TRUE) } \arguments{ \item{x}{Character or factor vector.} \item{xname}{Character: Name of input vector \code{x}.} \item{verbosity}{Integer: Verbosity level.} \item{thresh}{Numeric: Threshold for determining whether to convert to numeric.} \item{na.omit}{Logical: If TRUE, remove NA values before checking.} } \value{ Character. } \description{ Checks character or factor vector to determine whether it might be best to convert to numeric. } \details{ All data can be represented as a character string. A numeric variable may be read as a character variable if there are non-numeric characters in the data. It is important to be able to automatically detect such variables and convert them, which would mean introducing NA values. } \examples{ x <- c("3", "5", "undefined", "21", "4", NA) inspect_type(x) z <- c("mango", "banana", "tangerine", NA) inspect_type(z) } \author{ EDG } ================================================ FILE: man/is_constant.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{is_constant} \alias{is_constant} \title{Check if vector is constant} \usage{ is_constant(x, skip_missing = FALSE) } \arguments{ \item{x}{Vector: Input} \item{skip_missing}{Logical: If TRUE, skip NA values before test} } \value{ Logical. } \description{ Check if vector is constant } \examples{ x <- rep(9, 1000000) is_constant(x) x[10] <- NA is_constant(x) is_constant(x, skip_missing = TRUE) } \author{ EDG } ================================================ FILE: man/labelify.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_strings.R \name{labelify} \alias{labelify} \title{Format text for label printing} \usage{ labelify( x, underscores_to_spaces = TRUE, dotsToSpaces = TRUE, toLower = FALSE, toTitleCase = TRUE, capitalize_strings = c("id"), stringsToSpaces = c("\\\\$", "`") ) } \arguments{ \item{x}{Character: Input} \item{underscores_to_spaces}{Logical: If TRUE, convert underscores to spaces.} \item{dotsToSpaces}{Logical: If TRUE, convert dots to spaces.} \item{toLower}{Logical: If TRUE, convert to lowercase (precedes \code{toTitleCase}). Default = FALSE (Good for getting all-caps words converted to title case, bad for abbreviations you want to keep all-caps)} \item{toTitleCase}{Logical: If TRUE, convert to Title Case. Default = TRUE (This does not change all-caps words, set \code{toLower} to TRUE if desired)} \item{capitalize_strings}{Character, vector: Always capitalize these strings, if present. Default = \code{"id"}} \item{stringsToSpaces}{Character, vector: Replace these strings with spaces. Escape as needed for \code{gsub}. Default = \code{"\\\\$"}, which formats common input of the type \code{data.frame$variable}} } \value{ Character vector. } \description{ Format text for label printing } \examples{ x <- c("county_name", "total.cost$", "age", "weight.kg") labelify(x) } \author{ EDG } ================================================ FILE: man/massGLM.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/massGLM.R \name{massGLM} \alias{massGLM} \title{Mass-univariate GLM Analysis} \usage{ massGLM(x, y, scale_y = NULL, center_y = NULL, verbosity = 1L) } \arguments{ \item{x}{tabular data: Predictor variables. Usually a small number of covariates.} \item{y}{data.frame or similar: Each column is a different outcome. The function will train one GLM for each column of \code{y}. Usually a large number of features.} \item{scale_y}{Logical: If TRUE, scale each column of \code{y} to have mean 0 and sd 1. If \code{NULL}, defaults to TRUE if \code{y} is numeric, FALSE otherwise.} \item{center_y}{Logical: If TRUE, center each column of \code{y} to have mean 0. If \code{NULL}, defaults to TRUE if \code{scale_y} is TRUE, FALSE otherwise.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{MassGLM} object. } \description{ Mass-univariate GLM Analysis } \examples{ set.seed(2022) y <- rnormmat(500, 40, return_df = TRUE) x <- data.frame( x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500), x2 = y[[21]] + rnorm(500) ) massmod <- massGLM(x, y) # Print table of coefficients, p-values, etc. for all models summary(massmod) } \author{ EDG } ================================================ FILE: man/matchcases.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.R \name{matchcases} \alias{matchcases} \title{Match cases by covariates} \usage{ matchcases( target, pool, n_matches = 1, target_id = NULL, pool_id = NULL, exactmatch_factors = TRUE, exactmatch_cols = NULL, distmatch_cols = NULL, norepeats = TRUE, ignore_na = FALSE, verbosity = 1L ) } \arguments{ \item{target}{data.frame you are matching against.} \item{pool}{data.frame you are looking for matches from.} \item{n_matches}{Integer: Number of matches to return.} \item{target_id}{Character: Column name in \code{target} that holds unique cases IDs. Default = NULL, in which case integer case numbers will be used.} \item{pool_id}{Character: Same as \code{target_id} for \code{pool}.} \item{exactmatch_factors}{Logical: If TRUE, selected cases will have to exactly match factors available in \code{target}.} \item{exactmatch_cols}{Character: Names of columns that should be matched exactly.} \item{distmatch_cols}{Character: Names of columns that should be distance-matched.} \item{norepeats}{Logical: If TRUE, cases in \code{pool} can only be chosen once.} \item{ignore_na}{Logical: If TRUE, ignore NA values during exact matching.} \item{verbosity}{Integer: Verbosity level.} } \value{ data.frame } \description{ Find one or more cases from a \code{pool} data.frame that match cases in a target data.frame. Match exactly and/or by distance (sum of squared distances). } \examples{ set.seed(2021) cases <- data.frame( PID = paste0("PID", seq(4)), Sex = factor(c(1, 1, 0, 0)), Handedness = factor(c(1, 1, 0, 1)), Age = c(21, 27, 39, 24), Var = c(.7, .8, .9, .6), Varx = rnorm(4) ) controls <- data.frame( CID = paste0("CID", seq(50)), Sex = factor(sample(c(0, 1), 50, TRUE)), Handedness = factor(sample(c(0, 1), 50, TRUE, c(.1, .9))), Age = sample(16:42, 50, TRUE), Var = rnorm(50), Vary = rnorm(50) ) mc <- matchcases(cases, controls, 2, "PID", "CID") } \author{ EDG } ================================================ FILE: man/mgetnames.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_df.R \name{mgetnames} \alias{mgetnames} \title{Get names by string matching multiple patterns} \usage{ mgetnames( x, pattern = NULL, starts_with = NULL, ends_with = NULL, ignore_case = TRUE, return_index = FALSE ) } \arguments{ \item{x}{Character vector or object with \code{names()} method.} \item{pattern}{Character vector: pattern(s) to match anywhere in names of x.} \item{starts_with}{Character: pattern to match in the beginning of names of x.} \item{ends_with}{Character: pattern to match at the end of names of x.} \item{ignore_case}{Logical: If TRUE, well, ignore case.} \item{return_index}{Logical: If TRUE, return integer index of matches instead of names.} } \value{ Character vector of matched names or integer index. } \description{ Get names by string matching multiple patterns } \details{ \code{pattern}, \code{starts_with}, and \code{ends_with} are applied and the union of all matches is returned. \code{pattern} can be a character vector of multiple patterns to match. } \examples{ mgetnames(iris, pattern = c("Sepal", "Petal")) mgetnames(iris, starts_with = "Sepal") mgetnames(iris, ends_with = "Width") } \author{ EDG } ================================================ FILE: man/names_by_class.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.R \name{names_by_class} \alias{names_by_class} \title{List column names by class} \usage{ names_by_class(x, sorted = TRUE, item_format = highlight, maxlength = 24) } \arguments{ \item{x}{tabular data.} \item{sorted}{Logical: If TRUE, sort the output} \item{item_format}{Function: Function to format each item} \item{maxlength}{Integer: Maximum number of items to print} } \value{ \code{NULL}, invisibly. } \description{ List column names by class } \examples{ names_by_class(iris) } \author{ EDG } ================================================ FILE: man/one_hot2factor.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/preprocess.R \name{one_hot2factor} \alias{one_hot2factor} \title{Convert one-hot encoded matrix to factor} \usage{ one_hot2factor(x, labels = colnames(x)) } \arguments{ \item{x}{one-hot encoded matrix or data.frame.} \item{labels}{Character vector of level names.} } \value{ A factor. } \description{ Convert one-hot encoded matrix to factor } \details{ If input has a single column, it will be converted to factor and returned } \examples{ x <- data.frame(matrix(FALSE, 10, 3)) colnames(x) <- c("Dx1", "Dx2", "Dx3") x$Dx1[1:3] <- x$Dx2[4:6] <- x$Dx3[7:10] <- TRUE one_hot2factor(x) } \author{ EDG } ================================================ FILE: man/outcome.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{outcome} \alias{outcome} \title{Get the outcome as a vector} \usage{ outcome(x) } \arguments{ \item{x}{tabular data.} } \value{ Vector containing the last column of \code{x}. } \description{ Returns the last column of \code{x}, which is by convention the outcome variable. } \details{ This applied to tabular datasets used for supervised learning in rtemis, where, by convention, the last column is the outcome variable and all other columns are features. } \examples{ outcome(iris) } \author{ EDG } ================================================ FILE: man/outcome_name.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{outcome_name} \alias{outcome_name} \title{Get the name of the last column} \usage{ outcome_name(x) } \arguments{ \item{x}{tabular data.} } \value{ Name of the last column. } \description{ Get the name of the last column } \details{ This applied to tabular datasets used for supervised learning in rtemis, where, by convention, the last column is the outcome variable and all other columns are features. } \examples{ outcome_name(iris) } \author{ EDG } ================================================ FILE: man/plot.MassGLM.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/08_MassUni.R \name{plot.MassGLM} \alias{plot.MassGLM} \title{Plot MassGLM using volcano plot} \usage{ \method{plot}{MassGLM}( x, coefname = NULL, p_adjust_method = "holm", p_transform = function(x) -log10(x), xlab = "Coefficient", ylab = NULL, theme = choose_theme(getOption("rtemis_theme")), verbosity = 1L, ... ) } \arguments{ \item{x}{MassGLM object trained using \link{massGLM}.} \item{coefname}{Character: Name of coefficient to plot. If \code{NULL}, the first coefficient is used.} \item{p_adjust_method}{Character: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" - p-value adjustment method.} \item{p_transform}{Function to transform p-values for plotting. Default is \code{function(x) -log10(x)}.} \item{xlab}{Character: x-axis label.} \item{ylab}{Character: y-axis label.} \item{theme}{\code{Theme} object. Create using one of the \code{theme_} functions, e.g. \code{theme_whitegrid()}.} \item{verbosity}{Integer: Verbosity level.} \item{...}{Additional arguments passed to \link{draw_volcano}.} } \value{ plotly object with volcano plot. } \description{ Plot MassGLM using volcano plot } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} set.seed(2019) y <- rnormmat(500, 500, return_df = TRUE) x <- data.frame(x = y[, 3] + y[, 5] - y[, 9] + y[, 15] + rnorm(500)) mod <- massGLM(x, y) plot(mod) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/plot_manhattan.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R, R/08_MassUni.R \name{plot_manhattan} \alias{plot_manhattan} \alias{plot_manhattan.MassGLM} \title{Manhattan plot} \usage{ plot_manhattan(x, ...) plot_manhattan.MassGLM( x, coefname = NULL, p_adjust_method = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"), p_transform = function(x) -log10(x), ylab = NULL, theme = choose_theme(getOption("rtemis_theme")), col_pos = "#43A4AC", col_neg = "#FA9860", alpha = 0.8, ... ) } \arguments{ \item{x}{MassGLM object.} \item{...}{Additional arguments passed to \link{draw_bar}.} \item{coefname}{Character: Name of coefficient to plot. If \code{NULL}, the first coefficient is used.} \item{p_adjust_method}{Character: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" - p-value adjustment method.} \item{p_transform}{Function to transform p-values for plotting. Default is \code{function(x) -log10(x)}.} \item{ylab}{Character: y-axis label.} \item{theme}{\code{Theme} object.} \item{col_pos}{Character: Color for positive significant coefficients.} \item{col_neg}{Character: Color for negative significant coefficients.} \item{alpha}{Numeric: Transparency level for the bars.} } \value{ plotly object. } \description{ Draw a Manhattan plot for \code{MassGLM} objects created with \link{massGLM}. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} # x: outcome of interest as first column, optional covariates in the other columns # y: features whose association with x we want to study set.seed(2022) y <- data.table(rnormmat(500, 40)) x <- data.table( x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500), x2 = y[[21]] + rnorm(500) ) massmod <- massGLM(x, y) plot_manhattan(massmod) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/plot_roc.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{plot_roc} \alias{plot_roc} \title{Plot ROC curve} \usage{ plot_roc(x, ...) } \arguments{ \item{x}{\code{Classification} or \code{ClassificationRes} object.} \item{...}{Additional arguments passed to the plotting function.} } \value{ A plotly object containing the ROC curve. } \description{ This generic is used to plot the ROC curve for a model. } \examples{ ir <- iris[51:150, ] ir[["Species"]] <- factor(ir[["Species"]]) species_glm <- train(ir, algorithm = "GLM") plot_roc(species_glm) } \author{ EDG } ================================================ FILE: man/plot_true_pred.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{plot_true_pred} \alias{plot_true_pred} \title{Plot True vs. Predicted Values} \usage{ plot_true_pred(x, ...) } \arguments{ \item{x}{\code{Supervised} or \code{SupervisedRes} object.} \item{...}{Additional arguments passed to methods.} } \value{ plotly object. } \description{ Plot True vs. Predicted Values for Supervised objects. For classification, it plots a confusion matrix. For regression, it plots a scatter plot of true vs. predicted values. } \examples{ x <- set_outcome(iris, "Sepal.Length") sepallength_glm <- train(x, algorithm = "GLM") plot_true_pred(sepallength_glm) } \author{ EDG } ================================================ FILE: man/plot_varimp.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{plot_varimp} \alias{plot_varimp} \title{Plot Variable Importance} \usage{ plot_varimp(x, ...) } \arguments{ \item{x}{\code{Supervised} or \code{SupervisedRes} object.} \item{...}{Additional arguments passed to methods.} } \value{ plotly object or invisible NULL if no variable importance is available. } \description{ Plot Variable Importance for Supervised objects. } \details{ This method calls \link{draw_varimp} internally. If you pass an integer to the \code{plot_top} argument, the method will plot this many top features. If you pass a number between 0 and 1 to the \code{plot_top} argument, the method will plot this fraction of top features. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} ir <- set_outcome(iris, "Sepal.Length") seplen_cart <- train(ir, algorithm = "CART") plot_varimp(seplen_cart) # Plot horizontally plot_varimp(seplen_cart, orientation = "h") plot_varimp(seplen_cart, orientation = "h", plot_top = 3L) plot_varimp(seplen_cart, orientation = "h", plot_top = 0.5) \dontshow{\}) # examplesIf} } \seealso{ \link{draw_varimp}, which is called by this method } \author{ EDG } ================================================ FILE: man/preprocess.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R, R/preprocess.R \name{preprocess} \alias{preprocess} \alias{preprocess.class_tabular.PreprocessorConfig} \alias{preprocess.class_tabular.Preprocessor} \title{Preprocess Data} \usage{ preprocess(x, config, ...) preprocess.class_tabular.PreprocessorConfig( x, config, dat_validation = NULL, dat_test = NULL, verbosity = 1L ) preprocess.class_tabular.Preprocessor(x, config, verbosity = 1L) } \arguments{ \item{x}{data.frame, data.table, tbl_df (tabular data): Data to be preprocessed.} \item{config}{\code{PreprocessorConfig}: Setup using \link{setup_Preprocessor} OR \code{Preprocessor} object: Output of previous run of \code{preprocess}. This allows, for example, applying preprocessing to a validation or test set using the same parameters as were used for the training set. In particular, the same scale centers and coefficients will be applied to the new data.} \item{...}{Not used.} \item{dat_validation}{tabular data: Validation set data.} \item{dat_test}{tabular data: Test set data.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{Preprocessor} object. } \description{ Preprocess data for analysis and visualization. } \details{ Methods are provided for preprocessing training set data, which accepts a \code{PreprocessorConfig} object, and for preprocessing validation and test set data, which accept a \code{Preprocessor} object. } \examples{ # Setup a `Preprocessor`: this outputs a `PreprocessorConfig` object. prp <- setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE) # Includes a long list of parameters prp # Resample iris to get train and test data res <- resample(iris, setup_Resampler(seed = 2026)) iris_train <- iris[res[[1]], ] iris_test <- iris[-res[[1]], ] # Preprocess training data iris_pre <- preprocess(iris_train, prp) # Access preprocessd training data with `preprocessed()` preprocessed(iris_pre) # Apply the same preprocessing to test data # In this case, the scale and center values from training data will be used. # Note how `preprocess()` accepts either a `PreprocessorConfig` or `Preprocessor` object for # this reason. iris_test_pre <- preprocess(iris_test, iris_pre) # Access preprocessed test data preprocessed(iris_test_pre) } \author{ EDG } ================================================ FILE: man/preprocessed.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{preprocessed} \alias{preprocessed} \title{Get preprocessed data from \code{Preprocessor}.} \usage{ preprocessed(x) } \arguments{ \item{x}{\code{Preprocessor}: A \code{Preprocessor} object.} } \value{ data.frame: The preprocessed data. } \description{ Returns the preprocessed data from a \code{Preprocessor} object. } \examples{ prp <- preprocess(iris, setup_Preprocessor(scale = TRUE, center = TRUE)) preprocessed(prp) } ================================================ FILE: man/present.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{present} \alias{present} \title{Present rtemis object} \usage{ present(x, ...) } \arguments{ \item{x}{\code{Supervised} or \code{SupervisedRes} object or list of such objects.} \item{...}{Additional arguments passed to the plotting function.} } \value{ A plotly object. } \description{ This generic is used to present an rtemis object by printing to console and drawing plots. } \examples{ \dontshow{if (interactive()) withAutoprint(\{ # examplesIf} ir <- set_outcome(iris, "Sepal.Length") seplen_lightrf <- train(ir, algorithm = "lightrf") present(seplen_lightrf) \dontshow{\}) # examplesIf} } \author{ EDG } ================================================ FILE: man/previewcolor.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_color.R \name{previewcolor} \alias{previewcolor} \title{Preview color} \usage{ previewcolor( x, main = NULL, bg = "#333333", main_col = "#b3b3b3", main_x = 0.7, main_y = 0.2, main_adj = 0, main_cex = 0.9, main_font = 2, width = NULL, xlim = NULL, ylim = c(0, 2.2), asp = 1, labels_y = 1.55, label_cex = NULL, mar = c(0, 0, 0, 1), filename = NULL, pdf_width = 8, pdf_height = 2.5 ) } \arguments{ \item{x}{Color, vector: One or more colors that R understands} \item{main}{Character: Title. Default = NULL, which results in \code{deparse(substitute(x))}} \item{bg}{Background color.} \item{main_col}{Color: Title color} \item{main_x}{Float: x coordinate for \code{main}.} \item{main_y}{Float: y coordinate for \code{main}.} \item{main_adj}{Float: \code{adj} argument to mtext for \code{main}.} \item{main_cex}{Float: character expansion factor for \code{main}.} \item{main_font}{Integer, 1 or 2: Weight of \code{main} 1: regular, 2: bold.} \item{width}{Float: Plot width. Default = NULL, i.e. set automatically} \item{xlim}{Vector, length 2: x-axis limits. Default = NULL, i.e. set automatically} \item{ylim}{Vector, length 2: y-axis limits.} \item{asp}{Float: Plot aspect ratio.} \item{labels_y}{Float: y coord for labels. Default = 1.55 (rhombi are fixed and range y .5 - 1.5)} \item{label_cex}{Float: Character expansion for labels. Default = NULL, and is calculated automatically based on length of \code{x}} \item{mar}{Numeric vector, length 4: margin size.} \item{filename}{Character: Path to save plot as PDF.} \item{pdf_width}{Numeric: Width of PDF in inches.} \item{pdf_height}{Numeric: Height of PDF in inches.} } \value{ Nothing, prints plot. } \description{ Preview one or multiple colors using little rhombi with their little labels up top } \examples{ previewcolor(get_palette("rtms")) } \author{ EDG } ================================================ FILE: man/read.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/read.R \name{read} \alias{read} \title{Read tabular data from a variety of formats} \usage{ read( filename, datadir = NULL, make_unique = FALSE, character2factor = FALSE, clean_colnames = TRUE, delim_reader = c("data.table", "vroom", "duckdb", "arrow"), xlsx_sheet = 1, sep = NULL, quote = "\\"", na_strings = c(""), output = c("data.table", "tibble", "data.frame"), attr = NULL, value = NULL, verbosity = 1L, fread_verbosity = 0L, timed = verbosity > 0L, ... ) } \arguments{ \item{filename}{Character: filename or full path if \code{datadir = NULL}.} \item{datadir}{Character: Optional path to directory where \code{filename} is located. If not specified, \code{filename} must be the full path.} \item{make_unique}{Logical: If TRUE, keep unique rows only.} \item{character2factor}{Logical: If TRUE, convert character variables to factors.} \item{clean_colnames}{Logical: If TRUE, clean columns names using \link{clean_colnames}.} \item{delim_reader}{Character: package to use for reading delimited data.} \item{xlsx_sheet}{Integer or character: Name or number of XLSX sheet to read.} \item{sep}{Single character: field separator. If \code{delim_reader = "fread"} and \code{sep = NULL}, this defaults to "auto", otherwise defaults to ",".} \item{quote}{Single character: quote character.} \item{na_strings}{Character vector: Strings to be interpreted as NA values. For \code{delim_reader = "duckdb"}, this must be a single string.} \item{output}{Character: "default" or "data.table", If default, return the delim_reader's default data structure, otherwise convert to data.table.} \item{attr}{Character: Attribute to set (Optional).} \item{value}{Character: Value to set (if \code{attr} is not NULL).} \item{verbosity}{Integer: Verbosity level.} \item{fread_verbosity}{Integer: Verbosity level. Passed to \code{data.table::fread}} \item{timed}{Logical: If TRUE, time the process and print to console} \item{...}{Additional arguments to pass to \code{data.table::fread}, \code{arrow::read_delim_arrow()}, \code{vroom::vroom()}, or \code{readxl::read_excel()}.} } \value{ data.frame, data.table, or tibble. } \description{ Read data and optionally clean column names, keep unique rows, and convert characters to factors } \details{ \code{read} is a convenience function to read: \itemize{ \item \strong{Delimited} files using \code{data.table:fread()}, \code{arrow:read_delim_arrow()}, \code{vroom::vroom()}, or \code{duckdb::duckdb_read_csv()} \item \strong{ARFF} files using \code{farff::readARFF()} \item \strong{Parquet} files using \code{arrow::read_parquet()} \item \strong{XLSX} files using \code{readxl::read_excel()} \item \strong{DTA} files from Stata using \code{haven::read_dta()} \item \strong{FASTA} files using \code{seqinr::read.fasta()} \item \strong{RDS} files using \code{readRDS()} } } \examples{ \dontrun{ # Replace with your own data directory and filename datadir <- "/Data" dat <- read("iris.csv", datadir) } } \author{ EDG } ================================================ FILE: man/read_config.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/14_SuperConfig.R \name{read_config} \alias{read_config} \title{Read \code{SuperConfig} from TOML file} \usage{ read_config(file) } \arguments{ \item{file}{Character: Path to input TOML file.} } \value{ \code{SuperConfig} object. } \description{ Read \code{SuperConfig} object from TOML file that was written with \code{write_toml()}. } \examples{ # Create a SuperConfig object x <- setup_SuperConfig( dat_training_path = "~/Data/iris.csv", algorithm = "LightRF", hyperparameters = setup_LightRF() ) # Write TOML file tmpdir <- tempdir() tmpfile <- file.path(tmpdir, "rtemis_test.toml") write_toml(x, tmpfile) # Read config from TOML file x_read <- read_config(tmpfile) } \author{ EDG } ================================================ FILE: man/regression_metrics.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/metrics.R \name{regression_metrics} \alias{regression_metrics} \title{Regression Metrics} \usage{ regression_metrics(true, predicted, na.rm = TRUE, sample = NULL) } \arguments{ \item{true}{Numeric vector: True values.} \item{predicted}{Numeric vector: Predicted values.} \item{na.rm}{Logical: If TRUE, remove NA values before computation.} \item{sample}{Character: Sample name (e.g. "training", "test").} } \value{ \code{RegressionMetrics} object. } \description{ Regression Metrics } \examples{ true <- rnorm(100) predicted <- true + rnorm(100, sd = 0.5) regression_metrics(true, predicted) } \author{ EDG } ================================================ FILE: man/resample.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/resample.R \name{resample} \alias{resample} \title{Resample data} \usage{ resample(x, config = setup_Resampler(), verbosity = 1L) } \arguments{ \item{x}{Vector or data.frame: Usually the outcome; \code{NROW(x)} defines the sample size.} \item{config}{Resampler object created by \link{setup_Resampler}.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{Resampler} object. } \description{ Create resamples of your data, e.g. for model building or validation. "KFold" creates stratified folds, , "StratSub" creates stratified subsamples, "Bootstrap" gives the standard bootstrap, i.e. random sampling with replacement, while "StratBoot" uses StratSub and then randomly duplicates some of the training cases to reach original length of input (default) or length defined by \code{target_length}. } \details{ Note that option 'KFold' may result in resamples of slightly different length. Avoid all operations which rely on equal-length vectors. For example, you can't place resamples in a data.frame, but must use a list instead. } \examples{ y <- rnorm(200) # 10-fold (stratified) y_10fold <- resample(y, setup_Resampler(10L, "kfold")) y_10fold # 25 stratified subsamples y_25strat <- resample(y, setup_Resampler(25L, "stratsub")) y_25strat # 100 stratified bootstraps y_100strat <- resample(y, setup_Resampler(100L, "stratboot")) y_100strat # LOOCV y_loocv <- resample(y, setup_Resampler(type = "LOOCV")) y_loocv } \author{ EDG } ================================================ FILE: man/rnormmat.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{rnormmat} \alias{rnormmat} \title{Random Normal Matrix} \usage{ rnormmat( nrow = 10, ncol = 10, mean = 0, sd = 1, return_df = FALSE, seed = NULL ) } \arguments{ \item{nrow}{Integer: Number of rows.} \item{ncol}{Integer: Number of columns.} \item{mean}{Float: Mean.} \item{sd}{Float: Standard deviation.} \item{return_df}{Logical: If TRUE, return data.frame, otherwise matrix.} \item{seed}{Integer: Set seed for \code{rnorm}.} } \value{ \code{matrix} or \code{data.frame}. } \description{ Create a matrix or data frame of defined dimensions, whose columns are random normal vectors } \examples{ x <- rnormmat(20, 5, mean = 12, sd = 6, return_df = TRUE, seed = 2026) x } \author{ EDG } ================================================ FILE: man/rtemis-package.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/rtemis-package.R \docType{package} \name{rtemis-package} \alias{rtemis} \alias{rtemis-package} \title{\pkg{rtemis}: Advanced Machine Learning and Visualization} \description{ Advanced Machine Learning & Visualization made efficient, accessible, reproducible } \section{Online Documentation and Vignettes}{ \url{https://docs.rtemis.org/r/ml} } \section{System Setup}{ There are some options you can define in your .Rprofile (usually found in your home directory), so you do not have to define each time you execute a function. \describe{ \item{rtemis_theme}{General plotting theme; set to e.g. "whiteigrid" or "darkgraygrid"} \item{rtemis_font}{Font family to use in plots.} \item{rtemis_palette}{Name of default palette to use in plots. See options by running \code{get_palette()}} } } \section{Visualization}{ Graphics are handled using the \code{draw} family, which produces interactive plots primarily using \code{plotly} and other packages. } \section{Supervised Learning}{ By convention, the last column of the data is the outcome variable, and all other columns are predictors. Convenience function \link{set_outcome} can be used to move a specified column to the end of the data. Regression and Classification is performed using \code{train()}. This function allows you to preprocess, train, tune, and test models on multiple resamples. Use \link{available_supervised} to get a list of available algorithms } \section{Classification}{ For training of binary classification models, the outcome should be provided as a factor, with the \emph{second} level of the factor being the 'positive' class. } \section{Clustering}{ Clustering is performed using \code{cluster()}. Use \link{available_clustering} to get a list of available algorithms. } \section{Decomposition}{ Decomposition is performed using \code{decomp()}. Use \link{available_decomposition} to get a list of available algorithms. } \section{Type Documentation}{ Function documentation includes input type (e.g. "Character", "Integer", "Float"/"Numeric", etc). When applicable, value ranges are provided in interval notation. For example, Float: [0, 1) means floats between 0 and 1 including 0, but excluding 1. Categorical variables may include set of allowed values using curly braces. For example, Character: \{"future", "mirai", "none"\}. } \section{Tabular Data}{ \pkg{rtemis} internally uses methods for efficient handling of tabular data, with support for \code{data.frame}, \code{data.table}, and \code{tibble}. If a function is documented as accepting "tabular data", it should work with any of these data structures. If a function is documented as accepting only one of these, then it should only be used with that structure. For example, some optimized \code{data.table} operations that perform in-place modifications only work with \code{data.table} objects. } \seealso{ Useful links: \itemize{ \item \url{https://www.rtemis.org} \item \url{https://docs.rtemis.org/r/ml} \item \url{https://docs.rtemis.org/r/ml-api/} \item Report bugs at \url{https://github.com/rtemis-org/rtemis/issues} } } \author{ \strong{Maintainer}: E.D. Gennatas \email{gennatas@gmail.com} (\href{https://orcid.org/0000-0001-9280-3609}{ORCID}) [copyright holder] Authors: \itemize{ \item E.D. Gennatas \email{gennatas@gmail.com} (\href{https://orcid.org/0000-0001-9280-3609}{ORCID}) [copyright holder] } } ================================================ FILE: man/rtemis_colors.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/rtemis_color_system.R \name{rtemis_colors} \alias{rtemis_colors} \title{rtemis Color System} \format{ A named list with the following elements: \describe{ \item{red}{"kaimana red"} \item{blue}{"kaimana light blue"} \item{green}{"kaimana medium green"} \item{orange}{"coastside orange"} \item{teal}{"rtemis teal"} \item{purple}{"rtemis purple"} \item{magenta}{"rtemis magenta"} \item{highlight_col}{"highlight color"} \item{object}{"rtemis teal"} \item{info}{"lmd burgundy"} \item{outer}{"kaimana red"} \item{tuner}{"coastside orange"} } } \usage{ rtemis_colors } \description{ A named list of colors used consistently across all packages in the rtemis ecosystem. } \details{ Colors are provided as hex strings. } \examples{ rtemis_colors[["orange"]] rtemis_colors[["teal"]] } \author{ EDG } ================================================ FILE: man/rtversion.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{rtversion} \alias{rtversion} \title{Get rtemis version and system info} \usage{ rtversion() } \value{ List: rtemis version and system info, invisibly. } \description{ Get rtemis version and system info } \examples{ rtversion() } \author{ EDG } ================================================ FILE: man/runifmat.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{runifmat} \alias{runifmat} \title{Random Uniform Matrix} \usage{ runifmat( nrow = 10, ncol = 10, min = 0, max = 1, return_df = FALSE, seed = NULL ) } \arguments{ \item{nrow}{Integer: Number of rows.} \item{ncol}{Integer: Number of columns.} \item{min}{Float: Min.} \item{max}{Float: Max.} \item{return_df}{Logical: If TRUE, return data.frame, otherwise matrix.} \item{seed}{Integer: Set seed for \code{rnorm}.} } \value{ \code{matrix} or \code{data.frame}. } \description{ Create a matrix or data frame of defined dimensions, whose columns are random uniform vectors } \examples{ x <- runifmat(20, 5, min = 12, max = 18, return_df = TRUE, seed = 2026) x } \author{ EDG } ================================================ FILE: man/set_msg_sink.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/msg.R \name{set_msg_sink} \alias{set_msg_sink} \title{Set the rtemis message sink} \usage{ set_msg_sink(sink) } \arguments{ \item{sink}{Function or \code{NULL}.} } \value{ Previous sink (function or \code{NULL}), invisibly. } \description{ When set, \code{msg()}, \code{msg0()}, \code{msgstart()}, and \code{msgdone()} forward their structured output through \code{sink} instead of writing to the R console. Used by \code{rtemislive} to capture training-time messages and forward them over a WebSocket connection. Pass \code{NULL} to restore default console output. } \details{ The sink function is called once per message with a single argument: a list with fields \itemize{ \item \code{text}: character. The formatted message body (no datetime prefix). \item \code{caller}: character or \code{NA}. Calling function as identified by \code{format_caller()}. \item \code{ts}: character. Formatted timestamp (\code{"\%Y-\%m-\%d \%H:\%M:\%S"}). \item \code{level}: character. One of \code{"info"} (\code{msg}/\code{msg0}), \code{"start"} (\code{msgstart}), or \code{"done"} (\code{msgdone}). } When a sink is set, the console output path is \strong{skipped} for affected calls. Errors thrown by the sink propagate to the caller of \code{msg()}. } \examples{ captured <- list() set_msg_sink(function(m) captured[[length(captured) + 1L]] <<- m) # msg("hello world") # would append to `captured` set_msg_sink(NULL) # restore console output } \seealso{ \code{\link[=get_msg_sink]{get_msg_sink()}}, \code{\link[=with_msg_sink]{with_msg_sink()}}. } \author{ EDG } ================================================ FILE: man/set_outcome.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_supervised.R \name{set_outcome} \alias{set_outcome} \title{Move outcome to last column} \usage{ set_outcome(dat, outcome_column) } \arguments{ \item{dat}{data.frame or similar.} \item{outcome_column}{Character: Name of outcome column.} } \value{ object of same class as \code{data} } \description{ Move outcome to last column } \examples{ ir <- set_outcome(iris, "Sepal.Length") head(ir) } \author{ EDG } ================================================ FILE: man/setdiffsym.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{setdiffsym} \alias{setdiffsym} \title{Symmetric Set Difference} \usage{ setdiffsym(x, y) } \arguments{ \item{x}{vector} \item{y}{vector of same type as \code{x}} } \value{ Vector. } \description{ Symmetric Set Difference } \examples{ setdiff(1:10, 1:5) setdiff(1:5, 1:10) setdiffsym(1:10, 1:5) setdiffsym(1:5, 1:10) } \author{ EDG } ================================================ FILE: man/setup_CART.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_CART} \alias{setup_CART} \title{Setup CART Hyperparameters} \usage{ setup_CART( cp = 0.01, maxdepth = 20L, minsplit = 2L, minbucket = 1L, prune_cp = NULL, method = "auto", model = TRUE, maxcompete = 4L, maxsurrogate = 5L, usesurrogate = 2L, surrogatestyle = 0L, xval = 0L, cost = NULL, ifw = FALSE ) } \arguments{ \item{cp}{(Tunable) Numeric: Complexity parameter.} \item{maxdepth}{(Tunable) Integer: Maximum depth of tree.} \item{minsplit}{(Tunable) Integer: Minimum number of observations in a node to split.} \item{minbucket}{(Tunable) Integer: Minimum number of observations in a terminal node.} \item{prune_cp}{(Tunable) Numeric: Complexity for cost-complexity pruning after tree is built} \item{method}{String: Splitting method.} \item{model}{Logical: If TRUE, return a model.} \item{maxcompete}{Integer: Maximum number of competitive splits.} \item{maxsurrogate}{Integer: Maximum number of surrogate splits.} \item{usesurrogate}{Integer: Number of surrogate splits to use.} \item{surrogatestyle}{Integer: Type of surrogate splits.} \item{xval}{Integer: Number of cross-validation folds.} \item{cost}{Numeric (>=0): One for each feature.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ CARTHyperparameters object. } \description{ Setup hyperparameters for CART training. } \details{ Get more information from \link[rpart:rpart]{rpart::rpart} and \link[rpart:rpart.control]{rpart::rpart.control}. } \examples{ cart_hyperparams <- setup_CART(cp = 0.01, maxdepth = 10L, ifw = TRUE) cart_hyperparams } \author{ EDG } ================================================ FILE: man/setup_CMeans.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/09_ClusteringConfig.R \name{setup_CMeans} \alias{setup_CMeans} \title{Setup CMeansConfig} \usage{ setup_CMeans( k = 2L, max_iter = 100L, dist = c("euclidean", "manhattan"), method = c("cmeans", "ufcl"), m = 2, rate_par = NULL, weights = 1, control = list() ) } \arguments{ \item{k}{Integer: Number of clusters.} \item{max_iter}{Integer: Maximum number of iterations.} \item{dist}{Character: Distance measure to use: 'euclidean' or 'manhattan'.} \item{method}{Character: "cmeans" - fuzzy c-means clustering; "ufcl": on-line update.} \item{m}{Float (>1): Degree of fuzzification.} \item{rate_par}{Float (0, 1): Learning rate for the online variant.} \item{weights}{Float (>0): Case weights.} \item{control}{List: Control config for clustering algorithm.} } \value{ CMeansConfig object. } \description{ Setup CMeansConfig } \examples{ cmeans_config <- setup_CMeans(k = 4L, dist = "euclidean") cmeans_config } \author{ EDG } ================================================ FILE: man/setup_DBSCAN.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/09_ClusteringConfig.R \name{setup_DBSCAN} \alias{setup_DBSCAN} \title{Setup DBSCANConfig} \usage{ setup_DBSCAN( eps = 0.5, min_points = 5L, weights = NULL, border_points = TRUE, search = c("kdtree", "linear", "dist"), bucket_size = 100L, split_rule = c("SUGGEST", "STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR"), approx = FALSE ) } \arguments{ \item{eps}{Float: Radius of neighborhood.} \item{min_points}{Integer: Minimum number of points in a neighborhood to form a cluster.} \item{weights}{Numeric vector: Weights for data points.} \item{border_points}{Logical: If TRUE, assign border points to clusters.} \item{search}{Character: Nearest neighbor search strategy: "kdtree", "linear", or "dist".} \item{bucket_size}{Integer: Size of buckets for k-dtree search.} \item{split_rule}{Character: Rule for splitting clusters: "SUGGEST", "STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR".} \item{approx}{Logical: If TRUE, use approximate nearest neighbor search.} } \value{ DBSCANConfig object. } \description{ Setup DBSCANConfig } \examples{ dbscan_config <- setup_DBSCAN(eps = 0.5, min_points = 5L) dbscan_config } \author{ EDG } ================================================ FILE: man/setup_ExecutionConfig.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/01_ExecutionConfig.R \name{setup_ExecutionConfig} \alias{setup_ExecutionConfig} \title{Setup Execution Configuration} \usage{ setup_ExecutionConfig( backend = c("future", "mirai", "none"), n_workers = NULL, future_plan = NULL ) } \arguments{ \item{backend}{Character: Execution backend: "future", "mirai", or "none".} \item{n_workers}{Integer: Number of workers for parallel execution. Only used if \verb{backend is "future"} or "mirai". Do not rely on the default value, set to an appropriate number depending on your system.} \item{future_plan}{Character: Future plan to use if \code{backend} is "future".} } \value{ \code{ExecutionConfig} object. } \description{ Setup Execution Configuration } \examples{ setup_ExecutionConfig(backend = "future", n_workers = 4L, future_plan = "multisession") } \author{ EDG } ================================================ FILE: man/setup_GAM.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_GAM} \alias{setup_GAM} \title{Setup GAM Hyperparameters} \usage{ setup_GAM(k = 5L, ifw = FALSE) } \arguments{ \item{k}{(Tunable) Integer: Number of knots.} \item{ifw}{(Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ GAMHyperparameters object. } \description{ Setup hyperparameters for GAM training. } \details{ Get more information from \link[mgcv:gam]{mgcv::gam}. } \examples{ gam_hyperparams <- setup_GAM(k = 5L, ifw = FALSE) gam_hyperparams } \author{ EDG } ================================================ FILE: man/setup_GLM.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_GLM} \alias{setup_GLM} \title{Setup GLM Hyperparameters} \usage{ setup_GLM(ifw = FALSE) } \arguments{ \item{ifw}{(Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ GLMHyperparameters object. } \description{ Setup hyperparameters for GLM training. } \examples{ glm_hyperparams <- setup_GLM(ifw = TRUE) glm_hyperparams } \author{ EDG } ================================================ FILE: man/setup_GLMNET.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_GLMNET} \alias{setup_GLMNET} \title{Setup GLMNET Hyperparameters} \usage{ setup_GLMNET( alpha = 1, family = NULL, offset = NULL, which_lambda_cv = "lambda.1se", nlambda = 100L, lambda = NULL, penalty_factor = NULL, standardize = TRUE, intercept = TRUE, ifw = TRUE ) } \arguments{ \item{alpha}{(Tunable) Numeric: Mixing parameter.} \item{family}{Character: Family for GLMNET.} \item{offset}{Numeric: Offset for GLMNET.} \item{which_lambda_cv}{Character: Which lambda to use for prediction: "lambda.1se" or "lambda.min"} \item{nlambda}{Positive integer: Number of lambda values.} \item{lambda}{Numeric: Lambda values.} \item{penalty_factor}{Numeric: Penalty factor for each feature.} \item{standardize}{Logical: If TRUE, standardize features.} \item{intercept}{Logical: If TRUE, include intercept.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ GLMNETHyperparameters object. } \description{ Setup hyperparameters for GLMNET training. } \details{ Get more information from \link[glmnet:glmnet]{glmnet::glmnet}. } \examples{ glm_hyperparams <- setup_GLMNET(alpha = 1, ifw = TRUE) glm_hyperparams } \author{ EDG } ================================================ FILE: man/setup_GridSearch.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/06_Tuner.R \name{setup_GridSearch} \alias{setup_GridSearch} \title{Setup Grid Search Config} \usage{ setup_GridSearch( resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold"), search_type = "exhaustive", randomize_p = NULL, metrics_aggregate_fn = "mean", metric = NULL, maximize = NULL ) } \arguments{ \item{resampler_config}{\code{ResamplerConfig} set by \link{setup_Resampler}.} \item{search_type}{Character: "exhaustive" or "randomized". Type of grid search to use. Exhaustive search will try all combinations of config. Randomized will try a random sample of size \code{randomize_p} * \verb{N of total combinations}} \item{randomize_p}{Float (0, 1): For \code{search_type == "randomized"}, randomly test this proportion of combinations.} \item{metrics_aggregate_fn}{Character: Name of function to use to aggregate error metrics.} \item{metric}{Character: Metric to minimize or maximize.} \item{maximize}{Logical: If TRUE, maximize \code{metric}, otherwise minimize it.} } \value{ A \code{GridSearchConfig} object. } \description{ Create a \code{GridSearchConfig} object that can be passed to \link{train}. } \examples{ gridsearch_config <- setup_GridSearch( resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold"), search_type = "exhaustive" ) gridsearch_config } \author{ EDG } ================================================ FILE: man/setup_HardCL.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/09_ClusteringConfig.R \name{setup_HardCL} \alias{setup_HardCL} \title{Setup HardCLConfig} \usage{ setup_HardCL(k = 3L, dist = c("euclidean", "manhattan")) } \arguments{ \item{k}{Number of clusters.} \item{dist}{Character: Distance measure to use: 'euclidean' or 'manhattan'.} } \value{ HardCLConfig object. } \description{ Setup HardCLConfig } \examples{ hardcl_config <- setup_HardCL(k = 4L, dist = "euclidean") hardcl_config } \author{ EDG } ================================================ FILE: man/setup_ICA.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/11_DecompositionConfig.R \name{setup_ICA} \alias{setup_ICA} \title{setup_ICA} \usage{ setup_ICA( k = 3L, type = c("parallel", "deflation"), fun = c("logcosh", "exp"), alpha = 1, row_norm = TRUE, maxit = 100L, tol = 1e-04 ) } \arguments{ \item{k}{Integer: Number of components.} \item{type}{Character: Type of ICA: "parallel" or "deflation".} \item{fun}{Character: ICA function: "logcosh", "exp".} \item{alpha}{Numeric [1, 2]: Used in approximation to neg-entropy with \code{fun = "logcosh"}.} \item{row_norm}{Logical: If TRUE, normalize rows of \code{x} before ICA.} \item{maxit}{Integer: Maximum number of iterations.} \item{tol}{Numeric: Tolerance.} } \value{ ICAConfig object. } \description{ Setup ICA config. } \examples{ ica_config <- setup_ICA(k = 3L) ica_config } \author{ EDG } ================================================ FILE: man/setup_Isomap.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/11_DecompositionConfig.R \name{setup_Isomap} \alias{setup_Isomap} \title{Setup Isomap config.} \usage{ setup_Isomap( k = 2L, dist_method = c("euclidean", "manhattan"), nsd = 0L, path = c("shortest", "extended") ) } \arguments{ \item{k}{Integer: Number of components.} \item{dist_method}{Character: Distance method.} \item{nsd}{Integer: Number of shortest dissimilarities retained.} \item{path}{Character: Path argument for \code{vegan::isomap}.} } \value{ IsomapConfig object. } \description{ Setup Isomap config. } \examples{ isomap_config <- setup_Isomap(k = 3L) isomap_config } \author{ EDG } ================================================ FILE: man/setup_Isotonic.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_Isotonic} \alias{setup_Isotonic} \title{Setup Isotonic Hyperparameters} \usage{ setup_Isotonic(ifw = FALSE) } \arguments{ \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ IsotonicHyperparameters object. } \description{ Setup hyperparameters for Isotonic Regression. } \details{ There are not hyperparameters for this algorithm at this moment. } \examples{ isotonic_hyperparams <- setup_Isotonic(ifw = TRUE) isotonic_hyperparams } \author{ EDG } ================================================ FILE: man/setup_KMeans.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/09_ClusteringConfig.R \name{setup_KMeans} \alias{setup_KMeans} \title{Setup KMeansConfig} \usage{ setup_KMeans(k = 3L, dist = c("euclidean", "manhattan")) } \arguments{ \item{k}{Number of clusters.} \item{dist}{Character: Distance measure to use: 'euclidean' or 'manhattan'.} } \value{ KMeansConfig object. } \description{ Setup KMeansConfig } \examples{ kmeans_config <- setup_KMeans(k = 4L, dist = "euclidean") kmeans_config } \author{ EDG } ================================================ FILE: man/setup_LightCART.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_LightCART} \alias{setup_LightCART} \title{Setup LightCART Hyperparameters} \usage{ setup_LightCART( num_leaves = 32L, max_depth = -1L, lambda_l1 = 0, lambda_l2 = 0, min_data_in_leaf = 20L, max_cat_threshold = 32L, min_data_per_group = 100L, linear_tree = FALSE, objective = NULL, ifw = FALSE ) } \arguments{ \item{num_leaves}{(Tunable) Positive integer: Maximum number of leaves in one tree.} \item{max_depth}{(Tunable) Integer: Maximum depth of trees.} \item{lambda_l1}{(Tunable) Numeric: L1 regularization.} \item{lambda_l2}{(Tunable) Numeric: L2 regularization.} \item{min_data_in_leaf}{(Tunable) Positive integer: Minimum number of data in a leaf.} \item{max_cat_threshold}{(Tunable) Positive integer: Maximum number of categories for categorical features.} \item{min_data_per_group}{(Tunable) Positive integer: Minimum number of observations per categorical group.} \item{linear_tree}{(Tunable) Logical: If TRUE, use linear trees.} \item{objective}{Character: Objective function.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ LightCARTHyperparameters object. } \description{ Setup hyperparameters for LightCART training. } \details{ Get more information from \link[lightgbm:lgb.train]{lightgbm::lgb.train}. } \examples{ lightcart_hyperparams <- setup_LightCART(num_leaves = 32L, ifw = FALSE) lightcart_hyperparams } \author{ EDG } ================================================ FILE: man/setup_LightGBM.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_LightGBM} \alias{setup_LightGBM} \title{Setup LightGBM Hyperparameters} \usage{ setup_LightGBM( max_nrounds = 1000L, force_nrounds = NULL, early_stopping_rounds = 10L, num_leaves = 8L, max_depth = -1L, learning_rate = 0.01, feature_fraction = 1, subsample = 1, subsample_freq = 1L, lambda_l1 = 0, lambda_l2 = 0, max_cat_threshold = 32L, min_data_per_group = 32L, linear_tree = FALSE, ifw = FALSE, objective = NULL, device_type = "cpu", tree_learner = "serial", force_col_wise = TRUE ) } \arguments{ \item{max_nrounds}{Positive integer: Maximum number of boosting rounds.} \item{force_nrounds}{Positive integer: Use this many boosting rounds. Disable search for nrounds.} \item{early_stopping_rounds}{Positive integer: Number of rounds without improvement to stop training.} \item{num_leaves}{(Tunable) Positive integer: Maximum number of leaves in one tree.} \item{max_depth}{(Tunable) Integer: Maximum depth of trees.} \item{learning_rate}{(Tunable) Numeric: Learning rate.} \item{feature_fraction}{(Tunable) Numeric: Fraction of features to use.} \item{subsample}{(Tunable) Numeric: Fraction of data to use.} \item{subsample_freq}{(Tunable) Positive integer: Frequency of subsample.} \item{lambda_l1}{(Tunable) Numeric: L1 regularization.} \item{lambda_l2}{(Tunable) Numeric: L2 regularization.} \item{max_cat_threshold}{(Tunable) Positive integer: Maximum number of categories for categorical features.} \item{min_data_per_group}{(Tunable) Positive integer: Minimum number of observations per categorical group.} \item{linear_tree}{Logical: If TRUE, use linear trees.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} \item{objective}{Character: Objective function.} \item{device_type}{Character: "cpu" or "gpu".} \item{tree_learner}{Character: "serial", "feature", "data", or "voting".} \item{force_col_wise}{Logical: Use only with CPU - If TRUE, force col-wise histogram building.} } \value{ LightGBMHyperparameters object. } \description{ Setup hyperparameters for LightGBM training. } \details{ Get more information from \link[lightgbm:lgb.train]{lightgbm::lgb.train}. } \examples{ lightgbm_hyperparams <- setup_LightGBM( max_nrounds = 500L, learning_rate = c(0.001, 0.01, 0.05), ifw = TRUE ) lightgbm_hyperparams } \author{ EDG } ================================================ FILE: man/setup_LightRF.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_LightRF} \alias{setup_LightRF} \title{Setup LightRF Hyperparameters} \usage{ setup_LightRF( nrounds = 500L, num_leaves = 4096L, max_depth = -1L, feature_fraction = 0.7, subsample = 0.623, lambda_l1 = 0, lambda_l2 = 0, max_cat_threshold = 32L, min_data_per_group = 32L, linear_tree = FALSE, ifw = FALSE, objective = NULL, device_type = "cpu", tree_learner = "serial", force_col_wise = TRUE ) } \arguments{ \item{nrounds}{(Tunable) Positive integer: Number of boosting rounds.} \item{num_leaves}{(Tunable) Positive integer: Maximum number of leaves in one tree.} \item{max_depth}{(Tunable) Integer: Maximum depth of trees.} \item{feature_fraction}{(Tunable) Numeric: Fraction of features to use.} \item{subsample}{(Tunable) Numeric: Fraction of data to use.} \item{lambda_l1}{(Tunable) Numeric: L1 regularization.} \item{lambda_l2}{(Tunable) Numeric: L2 regularization.} \item{max_cat_threshold}{(Tunable) Positive integer: Maximum number of categories for categorical features.} \item{min_data_per_group}{(Tunable) Positive integer: Minimum number of observations per categorical group.} \item{linear_tree}{Logical: If TRUE, use linear trees.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} \item{objective}{Character: Objective function.} \item{device_type}{Character: "cpu" or "gpu".} \item{tree_learner}{Character: "serial", "feature", "data", or "voting".} \item{force_col_wise}{Logical: Use only with CPU - If TRUE, force col-wise histogram building.} } \value{ LightRFHyperparameters object. } \description{ Setup hyperparameters for LightRF training. } \details{ Get more information from \link[lightgbm:lgb.train]{lightgbm::lgb.train}. Note that hyperparameters subsample_freq and early_stopping_rounds are fixed, and cannot be set because they are what makes \code{lightgbm} train a random forest. These can all be set when training gradient boosting with LightGBM. } \examples{ lightrf_hyperparams <- setup_LightRF(nrounds = 1000L, ifw = FALSE) lightrf_hyperparams } \author{ EDG } ================================================ FILE: man/setup_LightRuleFit.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_LightRuleFit} \alias{setup_LightRuleFit} \title{Setup LightRuleFit Hyperparameters} \usage{ setup_LightRuleFit( nrounds = 200L, num_leaves = 32L, max_depth = 4L, learning_rate = 0.1, subsample = 0.666, subsample_freq = 1L, lambda_l1 = 0, lambda_l2 = 0, objective = NULL, ifw_lightgbm = FALSE, alpha = 1, lambda = NULL, ifw_glmnet = FALSE, ifw = FALSE ) } \arguments{ \item{nrounds}{(Tunable) Positive integer: Number of boosting rounds.} \item{num_leaves}{(Tunable) Positive integer: Maximum number of leaves in one tree.} \item{max_depth}{(Tunable) Integer: Maximum depth of trees.} \item{learning_rate}{(Tunable) Numeric: Learning rate.} \item{subsample}{(Tunable) Numeric: Fraction of data to use.} \item{subsample_freq}{(Tunable) Positive integer: Frequency of subsample.} \item{lambda_l1}{(Tunable) Numeric: L1 regularization.} \item{lambda_l2}{(Tunable) Numeric: L2 regularization.} \item{objective}{Character: Objective function.} \item{ifw_lightgbm}{(Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the LightGBM step.} \item{alpha}{(Tunable) Numeric: Alpha for GLMNET.} \item{lambda}{Numeric: Lambda for GLMNET.} \item{ifw_glmnet}{(Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the GLMNET step.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification. This applies IFW to both LightGBM and GLMNET.} } \value{ LightRuleFitHyperparameters object. } \description{ Setup hyperparameters for LightRuleFit training. } \details{ Get more information from \link[lightgbm:lgb.train]{lightgbm::lgb.train}. } \examples{ lightrulefit_hyperparams <- setup_LightRuleFit(nrounds = 300L, max_depth = 3L) lightrulefit_hyperparams } \author{ EDG } ================================================ FILE: man/setup_LinearSVM.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_LinearSVM} \alias{setup_LinearSVM} \title{Setup LinearSVM Hyperparameters} \usage{ setup_LinearSVM(cost = 1, ifw = FALSE) } \arguments{ \item{cost}{(Tunable) Numeric: Cost of constraints violation.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ LinearSVMHyperparameters object. } \description{ Setup hyperparameters for LinearSVM training. } \details{ Get more information from \link[e1071:svm]{e1071::svm}. } \examples{ linear_svm_hyperparams <- setup_LinearSVM(cost = 0.5, ifw = TRUE) linear_svm_hyperparams } \author{ EDG } ================================================ FILE: man/setup_NMF.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/11_DecompositionConfig.R \name{setup_NMF} \alias{setup_NMF} \title{Setup NMF config.} \usage{ setup_NMF(k = 2L, method = "brunet", nrun = if (length(k) > 1L) 30L else 1L) } \arguments{ \item{k}{Integer: Number of components.} \item{method}{Character: NMF method. See \code{NMF::nmf}.} \item{nrun}{Integer: Number of runs to perform.} } \value{ NMFConfig object. } \description{ Setup NMF config. } \examples{ nmf_config <- setup_NMF(k = 3L) nmf_config } \author{ EDG } ================================================ FILE: man/setup_NeuralGas.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/09_ClusteringConfig.R \name{setup_NeuralGas} \alias{setup_NeuralGas} \title{Setup NeuralGasConfig} \usage{ setup_NeuralGas(k = 3L, dist = c("euclidean", "manhattan")) } \arguments{ \item{k}{Number of clusters.} \item{dist}{Character: Distance measure to use: 'euclidean' or 'manhattan'.} } \value{ NeuralGasConfig object. } \description{ Setup NeuralGasConfig } \examples{ neuralgas_config <- setup_NeuralGas(k = 4L, dist = "euclidean") neuralgas_config } \author{ EDG } ================================================ FILE: man/setup_PCA.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/11_DecompositionConfig.R \name{setup_PCA} \alias{setup_PCA} \title{Setup PCA config.} \usage{ setup_PCA(k = 3L, center = TRUE, scale = TRUE, tol = NULL) } \arguments{ \item{k}{Integer: Number of components. (passed to \code{prcomp} \code{rank.})} \item{center}{Logical: If TRUE, center the data.} \item{scale}{Logical: If TRUE, scale the data.} \item{tol}{Numeric: Tolerance.} } \value{ PCAConfig object. } \description{ Setup PCA config. } \examples{ pca_config <- setup_PCA(k = 3L) pca_config } \author{ EDG } ================================================ FILE: man/setup_Preprocessor.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/04_Preprocessor.R \name{setup_Preprocessor} \alias{setup_Preprocessor} \title{Setup Preprocessor} \usage{ setup_Preprocessor( complete_cases = FALSE, remove_features_thres = NULL, remove_cases_thres = NULL, missingness = FALSE, impute = FALSE, impute_type = c("missRanger", "micePMM", "meanMode"), impute_missRanger_params = list(pmm.k = 3, maxiter = 10, num.trees = 500), impute_discrete = "get_mode", impute_continuous = "mean", integer2factor = FALSE, integer2numeric = FALSE, logical2factor = FALSE, logical2numeric = FALSE, numeric2factor = FALSE, numeric2factor_levels = NULL, numeric_cut_n = 0, numeric_cut_labels = FALSE, numeric_quant_n = 0, numeric_quant_NAonly = FALSE, unique_len2factor = 0, character2factor = FALSE, factorNA2missing = FALSE, factorNA2missing_level = "missing", factor2integer = FALSE, factor2integer_startat0 = TRUE, scale = FALSE, center = scale, scale_centers = NULL, scale_coefficients = NULL, remove_constants = FALSE, remove_constants_skip_missing = TRUE, remove_features = NULL, remove_duplicates = FALSE, one_hot = FALSE, one_hot_levels = NULL, add_date_features = FALSE, date_features = c("weekday", "month", "year"), add_holidays = FALSE, exclude = NULL ) } \arguments{ \item{complete_cases}{Logical: If TRUE, only retain complete cases (no missing data).} \item{remove_features_thres}{Float (0, 1): Remove features with missing values in >= to this fraction of cases.} \item{remove_cases_thres}{Float (0, 1): Remove cases with >= to this fraction of missing features.} \item{missingness}{Logical: If TRUE, generate new boolean columns for each feature with missing values, indicating which cases were missing data.} \item{impute}{Logical: If TRUE, impute missing cases. See \code{impute_discrete} and \code{impute_continuous}.} \item{impute_type}{Character: Package to use for imputation.} \item{impute_missRanger_params}{Named list with elements "pmm.k" and "maxiter", which are passed to \code{missRanger::missRanger}. \code{pmm.k} greater than 0 results in predictive mean matching. Default \code{pmm.k = 3} \code{maxiter = 10} \code{num.trees = 500}. Reduce \code{num.trees} for faster imputation especially in large datasets. Set \code{pmm.k = 0} to disable predictive mean matching.} \item{impute_discrete}{Character: Name of function that returns single value: How to impute discrete variables for \code{impute_type = "meanMode"}.} \item{impute_continuous}{Character: Name of function that returns single value: How to impute continuous variables for \code{impute_type = "meanMode"}.} \item{integer2factor}{Logical: If TRUE, convert all integers to factors. This includes \code{bit64::integer64} columns.} \item{integer2numeric}{Logical: If TRUE, convert all integers to numeric (will only work if \code{integer2factor = FALSE}). This includes \code{bit64::integer64} columns.} \item{logical2factor}{Logical: If TRUE, convert all logical variables to factors.} \item{logical2numeric}{Logical: If TRUE, convert all logical variables to numeric.} \item{numeric2factor}{Logical: If TRUE, convert all numeric variables to factors.} \item{numeric2factor_levels}{Character vector: Optional - will be passed to \code{levels} arg of \code{factor()} if \code{numeric2factor = TRUE}. For advanced/ specific use cases; need to know unique values of numeric vector(s) and given all numeric vars have same unique values.} \item{numeric_cut_n}{Integer: If > 0, convert all numeric variables to factors by binning using \code{base::cut} with \code{breaks} equal to this number.} \item{numeric_cut_labels}{Logical: The \code{labels} argument of \link[base:cut]{base::cut}.} \item{numeric_quant_n}{Integer: If > 0, convert all numeric variables to factors by binning using \code{base::cut} with \code{breaks} equal to this number of quantiles. produced using \code{stats::quantile}.} \item{numeric_quant_NAonly}{Logical: If TRUE, only bin numeric variables with missing values.} \item{unique_len2factor}{Integer (>=2): Convert all variables with less than or equal to this number of unique values to factors. For example, if binary variables are encoded with 1, 2, you could use \code{unique_len2factor = 2} to convert them to factors.} \item{character2factor}{Logical: If TRUE, convert all character variables to factors.} \item{factorNA2missing}{Logical: If TRUE, make NA values in factors be of level \code{factorNA2missing_level}. In many cases this is the preferred way to handle missing data in categorical variables. Note that since this step is performed before imputation, you can use this option to handle missing data in categorical variables and impute numeric variables in the same \code{preprocess} call.} \item{factorNA2missing_level}{Character: Name of level if \code{factorNA2missing = TRUE}.} \item{factor2integer}{Logical: If TRUE, convert all factors to integers.} \item{factor2integer_startat0}{Logical: If TRUE, start integer coding at 0.} \item{scale}{Logical: If TRUE, scale columns of \code{x}.} \item{center}{Logical: If TRUE, center columns of \code{x}. Note that by default it is the same as \code{scale}.} \item{scale_centers}{Named vector: Centering values for each feature.} \item{scale_coefficients}{Named vector: Scaling values for each feature.} \item{remove_constants}{Logical: If TRUE, remove constant columns.} \item{remove_constants_skip_missing}{Logical: If TRUE, skip missing values, before checking if feature is constant.} \item{remove_features}{Character vector: Features to remove.} \item{remove_duplicates}{Logical: If TRUE, remove duplicate cases.} \item{one_hot}{Logical: If TRUE, convert all factors using one-hot encoding.} \item{one_hot_levels}{List: Named list of the form "feature_name" = "levels". Used when applying one-hot encoding to validation or test data using \code{Preprocessor}.} \item{add_date_features}{Logical: If TRUE, extract date features from date columns.} \item{date_features}{Character vector: Features to extract from dates.} \item{add_holidays}{Logical: If TRUE, extract holidays from date columns.} \item{exclude}{Integer, vector: Exclude these columns from preprocessing.} } \value{ \code{PreprocessorConfig} object. } \description{ Creates a \code{PreprocessorConfig} object, which can be used in \link{preprocess}. } \section{Order of Operations}{ \itemize{ \item keep complete cases only \item remove constants \item remove duplicates \item remove cases by missingness threshold \item remove features by missingness threshold \item integer to factor \item integer to numeric \item logical to factor \item logical to numeric \item numeric to factor \item cut numeric to n bins \item cut numeric to n quantiles \item numeric with less than N unique values to factor \item character to factor \item factor NA to named level \item add missingness column \item impute \item scale and/or center \item one-hot encoding } } \examples{ preproc_config <- setup_Preprocessor(factorNA2missing = TRUE) preproc_config } \author{ EDG } ================================================ FILE: man/setup_RadialSVM.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_RadialSVM} \alias{setup_RadialSVM} \title{Setup RadialSVM Hyperparameters} \usage{ setup_RadialSVM(cost = 1, gamma = 0.01, ifw = FALSE) } \arguments{ \item{cost}{(Tunable) Numeric: Cost of constraints violation.} \item{gamma}{(Tunable) Numeric: Kernel coefficient.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ RadialSVMHyperparameters object. } \description{ Setup hyperparameters for RadialSVM training. } \details{ Get more information from \link[e1071:svm]{e1071::svm}. } \examples{ radial_svm_hyperparams <- setup_RadialSVM(cost = 10, gamma = 0.1, ifw = TRUE) radial_svm_hyperparams } \author{ EDG } ================================================ FILE: man/setup_Ranger.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_Ranger} \alias{setup_Ranger} \title{Setup Ranger Hyperparameters} \usage{ setup_Ranger( num_trees = 500, mtry = NULL, importance = "impurity", write_forest = TRUE, probability = FALSE, min_node_size = NULL, min_bucket = NULL, max_depth = NULL, replace = TRUE, sample_fraction = ifelse(replace, 1, 0.632), case_weights = NULL, class_weights = NULL, splitrule = NULL, num_random_splits = 1, alpha = 0.5, minprop = 0.1, poisson_tau = 1, split_select_weights = NULL, always_split_variables = NULL, respect_unordered_factors = NULL, scale_permutation_importance = FALSE, local_importance = FALSE, regularization_factor = 1, regularization_usedepth = FALSE, keep_inbag = FALSE, inbag = NULL, holdout = FALSE, quantreg = FALSE, time_interest = NULL, oob_error = TRUE, save_memory = FALSE, verbose = TRUE, node_stats = FALSE, seed = NULL, na_action = "na.learn", ifw = FALSE ) } \arguments{ \item{num_trees}{(Tunable) Positive integer: Number of trees.} \item{mtry}{(Tunable) Positive integer: Number of features to consider at each split.} \item{importance}{Character: Variable importance mode. "none", "impurity", "impurity_corrected", "permutation". The "impurity" measure is the Gini index for classification, the variance of the responses for regression.} \item{write_forest}{Logical: Save ranger.forest object, required for prediction. Set to FALSE to reduce memory usage if no prediction intended.} \item{probability}{Logical: Grow a probability forest as in Malley et al. (2012). For classification only.} \item{min_node_size}{(Tunable) Positive integer: Minimal node size. Default 1 for classification, 5 for regression, 3 for survival, and 10 for probability.} \item{min_bucket}{Positive integer: Minimal number of samples in a terminal node. Only for survival. Deprecated in favor of min_node_size.} \item{max_depth}{(Tunable) Positive integer: Maximal tree depth. A value of NULL or 0 (the default) corresponds to unlimited depth, 1 to tree stumps (1 split per tree).} \item{replace}{Logical: Sample with replacement.} \item{sample_fraction}{(Tunable) Numeric: Fraction of observations to sample. Default is 1 for sampling with replacement and 0.632 for sampling without replacement.} \item{case_weights}{Numeric vector: Weights for sampling of training observations. Observations with larger weights will be selected with higher probability in the bootstrap (or subsampled) samples for the trees.} \item{class_weights}{Numeric vector: Weights for the outcome classes for classification. Vector of the same length as the number of classes, with names corresponding to the class labels.} \item{splitrule}{(Tunable) Character: Splitting rule. For classification: "gini", "extratrees", "hellinger". For regression: "variance", "extratrees", "maxstat", "beta". For survival: "logrank", "extratrees", "C", "maxstat".} \item{num_random_splits}{(Tunable) Positive integer: For "extratrees" splitrule: Number of random splits to consider for each candidate splitting variable.} \item{alpha}{(Tunable) Numeric: For "maxstat" splitrule: significance threshold to allow splitting.} \item{minprop}{(Tunable) Numeric: For "maxstat" splitrule: lower quantile of covariate distribution to be considered for splitting.} \item{poisson_tau}{Numeric: For "poisson" regression splitrule: tau parameter for Poisson regression.} \item{split_select_weights}{Numeric vector: Numeric vector with weights between 0 and 1, representing the probability to select variables for splitting. Alternatively, a list of size num_trees, with one weight vector per tree.} \item{always_split_variables}{Character vector: Character vector with variable names to be always selected in addition to the mtry variables tried for splitting.} \item{respect_unordered_factors}{Character or logical: Handling of unordered factor covariates. For "partition" all 2^(k-1)-1 possible partitions are considered for splitting, where k is the number of factor levels. For "ignore", all factor levels are ordered by their first occurrence in the data. For "order", all factor levels are ordered by their average response. TRUE corresponds to "partition" for the randomForest package compatibility.} \item{scale_permutation_importance}{Logical: Scale permutation importance by standard error as in (Breiman 2001). Only applicable if permutation variable importance mode selected.} \item{local_importance}{Logical: For permutation variable importance, use local importance as in Breiman (2001) and Liaw & Wiener (2002).} \item{regularization_factor}{(Tunable) Numeric: Regularization factor. Penalize variables with many split points. Requires splitrule = "variance".} \item{regularization_usedepth}{Logical: Use regularization factor with node depth. Requires regularization_factor.} \item{keep_inbag}{Logical: Save how often observations are in-bag in each tree. These will be used for (local) variable importance if inbag.counts in predict() is NULL.} \item{inbag}{List: Manually set observations per tree. List of size num_trees, containing inbag counts for each observation. Can be used for stratified sampling.} \item{holdout}{Logical: Hold-out mode. Hold-out all samples with case weight 0 and use these for variable importance and prediction error.} \item{quantreg}{Logical: Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). For regression only. Set keep_inbag = TRUE to prepare out-of-bag quantile prediction.} \item{time_interest}{Numeric: For GWAS data: SNP with this number will be used as time variable. Only for survival. Deprecated, use time.var in formula instead.} \item{oob_error}{Logical: Compute OOB prediction error. Set to FALSE to save computation time if only the forest is needed.} \item{save_memory}{Logical: Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems.} \item{verbose}{Logical: Show computation status and estimated runtime.} \item{node_stats}{Logical: Save additional node statistics. Only terminal nodes for now.} \item{seed}{Positive integer: Random seed. Default is NULL, which generates the seed from R. Set to 0 to ignore the R seed.} \item{na_action}{Character: Action to take if the data contains missing values. "na.learn" uses observations with missing values in splitting, treating missing values as a separate category.} \item{ifw}{Logical: Inverse Frequency Weighting for classification. If TRUE, class weights are set inversely proportional to the class frequencies.} } \value{ RangerHyperparameters object. } \description{ Setup hyperparameters for Ranger Random Forest training. } \details{ Get more information from \link[ranger:ranger]{ranger::ranger}. } \examples{ ranger_hyperparams <- setup_Ranger(num_trees = 1000L, ifw = FALSE) ranger_hyperparams } \author{ EDG } ================================================ FILE: man/setup_Resampler.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/05_Resampler.R \name{setup_Resampler} \alias{setup_Resampler} \title{Setup Resampler} \usage{ setup_Resampler( n_resamples = 10L, type = c("KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"), stratify_var = NULL, train_p = 0.75, strat_n_bins = 4L, target_length = NULL, id_strat = NULL, seed = NULL, verbosity = 1L ) } \arguments{ \item{n_resamples}{Integer: Number of resamples to make.} \item{type}{Character: Type of resampler: "KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"} \item{stratify_var}{Character: Variable to stratify by.} \item{train_p}{Float: Training set percentage.} \item{strat_n_bins}{Integer: Number of bins to stratify by.} \item{target_length}{Integer: Target length for stratified bootstraps.} \item{id_strat}{Integer: Vector of indices to stratify by. These may be, for example, case IDs if your dataset contains repeated measurements. By specifying this vector, you can ensure that each case can only be present in the training or test set, but not both.} \item{seed}{Integer: Random seed.} \item{verbosity}{Integer: Verbosity level.} } \value{ ResamplerConfig object. } \description{ Setup Resampler } \examples{ tenfold_resampler <- setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L) tenfold_resampler } \author{ EDG } ================================================ FILE: man/setup_SuperConfig.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/14_SuperConfig.R \name{setup_SuperConfig} \alias{setup_SuperConfig} \title{Setup SuperConfig} \usage{ setup_SuperConfig( dat_training_path, dat_validation_path = NULL, dat_test_path = NULL, weights = NULL, preprocessor_config = NULL, algorithm = NULL, hyperparameters = NULL, tuner_config = NULL, outer_resampling_config = NULL, execution_config = setup_ExecutionConfig(), question = NULL, outdir = "results/", verbosity = 1L ) } \arguments{ \item{dat_training_path}{Character: Path to training data file.} \item{dat_validation_path}{Character: Path to validation data file.} \item{dat_test_path}{Character: Path to test data file.} \item{weights}{Optional Character: Column name in training data to use as observation weights. If NULL, no weights are used.} \item{preprocessor_config}{\code{PreprocessorConfig} object: Configuration for data preprocessing.} \item{algorithm}{Character: Algorithm to use for training.} \item{hyperparameters}{\code{Hyperparameters} object: Configuration for model hyperparameters.} \item{tuner_config}{\code{TunerConfig} object: Configuration for hyperparameter tuning.} \item{outer_resampling_config}{\code{ResamplerConfig} object: Configuration for outer res resampling during model training.} \item{execution_config}{\code{ExecutionConfig} object: Configuration for execution settings. Setup with \link{setup_ExecutionConfig}.} \item{question}{Character: Question to answer with the supervised learning analysis.} \item{outdir}{Character: Output directory for results.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{SuperConfig} object. } \description{ Setup \code{SuperConfig} object. } \examples{ sc <- setup_SuperConfig( dat_training_path = "train.csv", preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE), algorithm = "LightRF", hyperparameters = setup_LightRF(), tuner_config = setup_GridSearch(), outer_resampling_config = setup_Resampler(), execution_config = setup_ExecutionConfig(), question = "Can we tell iris species apart given their measurements?", outdir = "models/" ) } \author{ EDG } ================================================ FILE: man/setup_SuperConfigLive.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/14_SuperConfig.R \name{setup_SuperConfigLive} \alias{setup_SuperConfigLive} \title{Setup SuperConfigLive} \usage{ setup_SuperConfigLive( dat_training, dat_validation = NULL, dat_test = NULL, weights = NULL, preprocessor_config = NULL, algorithm = NULL, hyperparameters = NULL, tuner_config = NULL, outer_resampling_config = NULL, execution_config = setup_ExecutionConfig(), question = NULL, outdir = NULL, verbosity = 1L ) } \arguments{ \item{dat_training}{data.frame or data.table. Training data.} \item{dat_validation}{data.frame, data.table, or \code{NULL}.} \item{dat_test}{data.frame, data.table, or \code{NULL}.} \item{weights}{Character or \code{NULL}. Column name in \code{dat_training} used as observation weights.} \item{preprocessor_config, algorithm, hyperparameters, tuner_config, outer_resampling_config, execution_config, question, verbosity}{See \link{setup_SuperConfig}.} \item{outdir}{Character or \code{NULL}. Output directory; \code{NULL} (the default) means "do not write to disk" (the rtemislive case).} } \value{ \code{SuperConfigLive} object. } \description{ Build a \code{SuperConfigLive} — same shape as \link{setup_SuperConfig} but with in-memory tabular data instead of file paths. } \author{ EDG } ================================================ FILE: man/setup_TabNet.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/02_Hyperparameters.R \name{setup_TabNet} \alias{setup_TabNet} \title{Setup TabNet Hyperparameters} \usage{ setup_TabNet( batch_size = 1024^2, penalty = 0.001, clip_value = NULL, loss = "auto", epochs = 50L, drop_last = FALSE, decision_width = NULL, attention_width = NULL, num_steps = 3L, feature_reusage = 1.3, mask_type = "sparsemax", virtual_batch_size = 256^2, valid_split = 0, learn_rate = 0.02, optimizer = "adam", lr_scheduler = NULL, lr_decay = 0.1, step_size = 30, checkpoint_epochs = 10L, cat_emb_dim = 1L, num_independent = 2L, num_shared = 2L, num_independent_decoder = 1L, num_shared_decoder = 1L, momentum = 0.02, pretraining_ratio = 0.5, device = "auto", importance_sample_size = NULL, early_stopping_monitor = "auto", early_stopping_tolerance = 0, early_stopping_patience = 0, num_workers = 0L, skip_importance = FALSE, ifw = FALSE ) } \arguments{ \item{batch_size}{(Tunable) Positive integer: Batch size.} \item{penalty}{(Tunable) Numeric: Regularization penalty.} \item{clip_value}{Numeric: Clip value.} \item{loss}{Character: Loss function.} \item{epochs}{(Tunable) Positive integer: Number of epochs.} \item{drop_last}{Logical: If TRUE, drop last batch.} \item{decision_width}{(Tunable) Positive integer: Decision width.} \item{attention_width}{(Tunable) Positive integer: Attention width.} \item{num_steps}{(Tunable) Positive integer: Number of steps.} \item{feature_reusage}{(Tunable) Numeric: Feature reusage.} \item{mask_type}{Character: Mask type.} \item{virtual_batch_size}{(Tunable) Positive integer: Virtual batch size.} \item{valid_split}{Numeric: Validation split.} \item{learn_rate}{(Tunable) Numeric: Learning rate.} \item{optimizer}{Character or torch function: Optimizer.} \item{lr_scheduler}{Character or torch function: "step", "reduce_on_plateau".} \item{lr_decay}{Numeric: Learning rate decay.} \item{step_size}{Positive integer: Step size.} \item{checkpoint_epochs}{(Tunable) Positive integer: Checkpoint epochs.} \item{cat_emb_dim}{(Tunable) Positive integer: Categorical embedding dimension.} \item{num_independent}{(Tunable) Positive integer: Number of independent Gated Linear Units (GLU) at each step of the encoder.} \item{num_shared}{(Tunable) Positive integer: Number of shared Gated Linear Units (GLU) at each step of the encoder.} \item{num_independent_decoder}{(Tunable) Positive integer: Number of independent GLU layers for pretraining.} \item{num_shared_decoder}{(Tunable) Positive integer: Number of shared GLU layers for pretraining.} \item{momentum}{(Tunable) Numeric: Momentum.} \item{pretraining_ratio}{(Tunable) Numeric: Pretraining ratio.} \item{device}{Character: Device "cpu" or "cuda".} \item{importance_sample_size}{Positive integer: Importance sample size.} \item{early_stopping_monitor}{Character: Early stopping monitor. "valid_loss", "train_loss", "auto".} \item{early_stopping_tolerance}{Numeric: Minimum relative improvement to reset the patience counter.} \item{early_stopping_patience}{Positive integer: Number of epochs without improving before stopping.} \item{num_workers}{Positive integer: Number of subprocesses for data loacding.} \item{skip_importance}{Logical: If TRUE, skip importance calculation.} \item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.} } \value{ TabNetHyperparameters object. } \description{ Setup hyperparameters for TabNet training. } \examples{ tabnet_hyperparams <- setup_TabNet(epochs = 100L, learn_rate = 0.01) tabnet_hyperparams } \author{ EDG } ================================================ FILE: man/setup_UMAP.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/11_DecompositionConfig.R \name{setup_UMAP} \alias{setup_UMAP} \title{Setup UMAP config.} \usage{ setup_UMAP( k = 2L, n_neighbors = 15L, init = "spectral", metric = c("euclidean", "cosine", "manhattan", "hamming", "categorical"), n_epochs = NULL, learning_rate = 1, scale = TRUE ) } \arguments{ \item{k}{Integer: Number of components.} \item{n_neighbors}{Integer: Number of keighbors.} \item{init}{Character: Initialization type. See \verb{uwot::umap "init"}.} \item{metric}{Character: Distance metric to use: "euclidean", "cosine", "manhattan", "hamming", "categorical".} \item{n_epochs}{Integer: Number of epochs.} \item{learning_rate}{Float: Learning rate.} \item{scale}{Logical: If TRUE, scale input data before doing UMAP.} } \value{ UMAPConfig object. } \description{ Setup UMAP config. } \details{ A high \code{n_neighbors} value may give error in some systems: "Error in irlba::irlba(L, nv = n, nu = 0, maxit = iters) : function 'as_cholmod_sparse' not provided by package 'Matrix'" } \examples{ umap_config <- setup_UMAP(k = 3L) umap_config } \author{ EDG } ================================================ FILE: man/setup_tSNE.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/11_DecompositionConfig.R \name{setup_tSNE} \alias{setup_tSNE} \title{Setup tSNE config.} \usage{ setup_tSNE( k = 2L, initial_dims = 50L, perplexity = 30, theta = 0.5, check_duplicates = TRUE, pca = TRUE, partial_pca = FALSE, max_iter = 1000L, verbose = getOption("verbose", FALSE), is_distance = FALSE, Y_init = NULL, pca_center = TRUE, pca_scale = FALSE, normalize = TRUE, stop_lying_iter = ifelse(is.null(Y_init), 250L, 0L), mom_switch_iter = ifelse(is.null(Y_init), 250L, 0L), momentum = 0.5, final_momentum = 0.8, eta = 200, exaggeration_factor = 12, num_threads = 1L ) } \arguments{ \item{k}{Integer: Number of components.} \item{initial_dims}{Integer: Initial dimensions.} \item{perplexity}{Integer: Perplexity.} \item{theta}{Float: Theta.} \item{check_duplicates}{Logical: If TRUE, check for duplicates.} \item{pca}{Logical: If TRUE, perform PCA.} \item{partial_pca}{Logical: If TRUE, perform partial PCA.} \item{max_iter}{Integer: Maximum number of iterations.} \item{verbose}{Logical: If TRUE, print messages.} \item{is_distance}{Logical: If TRUE, \code{x} is a distance matrix.} \item{Y_init}{Matrix: Initial Y matrix.} \item{pca_center}{Logical: If TRUE, center PCA.} \item{pca_scale}{Logical: If TRUE, scale PCA.} \item{normalize}{Logical: If TRUE, normalize.} \item{stop_lying_iter}{Integer: Stop lying iterations.} \item{mom_switch_iter}{Integer: Momentum switch iterations.} \item{momentum}{Float: Momentum.} \item{final_momentum}{Float: Final momentum.} \item{eta}{Float: Eta.} \item{exaggeration_factor}{Float: Exaggeration factor.} \item{num_threads}{Integer: Number of threads.} } \value{ tSNEConfig object. } \description{ Setup tSNE config. } \details{ Get more information on the config by running \code{?Rtsne::Rtsne}. } \examples{ tSNE_config <- setup_tSNE(k = 3L) tSNE_config } \author{ EDG } ================================================ FILE: man/size.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{size} \alias{size} \title{Size of object} \usage{ size(x, verbosity = 1L) } \arguments{ \item{x}{any object with \code{length()} or \code{dim()}.} \item{verbosity}{Integer: Verbosity level. If > 0, print size to console} } \value{ Integer vector with length equal to the number of dimensions of \code{x}, invisibly. } \description{ Returns the size of an object } \details{ If \code{dim(x)} is NULL, returns \code{length(x)}. } \examples{ x <- rnorm(20) size(x) # 20 x <- matrix(rnorm(100), 20, 5) size(x) # 20 5 } \author{ EDG } ================================================ FILE: man/table_column_attr.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_data.R \name{table_column_attr} \alias{table_column_attr} \title{Tabulate column attributes} \usage{ table_column_attr(x, attr = "source", useNA = "always") } \arguments{ \item{x}{tabular data: Input data set.} \item{attr}{Character: Attribute to get} \item{useNA}{Character: Passed to \code{table}} } \value{ table. } \description{ Tabulate column attributes } \examples{ library(data.table) x <- data.table( id = 1:5, sbp = rnorm(5, 120, 15), dbp = rnorm(5, 80, 10), paO2 = rnorm(5, 90, 10), paCO2 = rnorm(5, 40, 5) ) setattr(x[["sbp"]], "source", "outpatient") setattr(x[["dbp"]], "source", "outpatient") setattr(x[["paO2"]], "source", "icu") setattr(x[["paCO2"]], "source", "icu") table_column_attr(x, "source") } \author{ EDG } ================================================ FILE: man/theme.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/theme.R \name{theme_black} \alias{theme_black} \alias{theme_blackgrid} \alias{theme_blackigrid} \alias{theme_darkgray} \alias{theme_darkgraygrid} \alias{theme_darkgrayigrid} \alias{theme_white} \alias{theme_whitegrid} \alias{theme_whiteigrid} \alias{theme_lightgraygrid} \alias{theme_mediumgraygrid} \title{Themes for \verb{draw_*} functions} \usage{ theme_black( bg = "#000000", plot_bg = "transparent", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = FALSE, grid_nx = NULL, grid_ny = NULL, grid_col = fg, grid_alpha = 0.2, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = 0.5, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_blackgrid( bg = "#000000", plot_bg = "transparent", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = fg, grid_alpha = 0.2, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_blackigrid( bg = "#000000", plot_bg = "#1A1A1A", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = bg, grid_alpha = 1, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_darkgray( bg = "#121212", plot_bg = "transparent", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = FALSE, grid_nx = NULL, grid_ny = NULL, grid_col = fg, grid_alpha = 0.2, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = 0.5, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_darkgraygrid( bg = "#121212", plot_bg = "transparent", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = "#404040", grid_alpha = 1, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = "#00000000", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_darkgrayigrid( bg = "#121212", plot_bg = "#202020", fg = "#ffffff", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = bg, grid_alpha = 1, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = "transparent", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_white( bg = "#ffffff", plot_bg = "transparent", fg = "#000000", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = FALSE, grid_nx = NULL, grid_ny = NULL, grid_col = fg, grid_alpha = 1, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = fg, tick_alpha = 0.5, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_whitegrid( bg = "#ffffff", plot_bg = "transparent", fg = "#000000", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = "#c0c0c0", grid_alpha = 1, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = "#00000000", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_whiteigrid( bg = "#ffffff", plot_bg = "#E6E6E6", fg = "#000000", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = bg, grid_alpha = 1, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = "transparent", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_lightgraygrid( bg = "#dfdfdf", plot_bg = "transparent", fg = "#000000", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = "#c0c0c0", grid_alpha = 1, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = "#00000000", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) theme_mediumgraygrid( bg = "#b3b3b3", plot_bg = "transparent", fg = "#000000", pch = 16, cex = 1, lwd = 2, bty = "n", box_col = fg, box_alpha = 1, box_lty = 1, box_lwd = 0.5, grid = TRUE, grid_nx = NULL, grid_ny = NULL, grid_col = "#d0d0d0", grid_alpha = 1, grid_lty = 1, grid_lwd = 1, axes_visible = TRUE, axes_col = "transparent", tick_col = "#00000000", tick_alpha = 1, tick_labels_col = fg, tck = -0.01, tcl = NA, x_axis_side = 1, y_axis_side = 2, labs_col = fg, x_axis_line = 0, x_axis_las = 0, x_axis_padj = -1.1, x_axis_hadj = 0.5, y_axis_line = 0, y_axis_las = 1, y_axis_padj = 0.5, y_axis_hadj = 0.5, xlab_line = 1.4, ylab_line = 2, zerolines = TRUE, zerolines_col = fg, zerolines_alpha = 0.5, zerolines_lty = 1, zerolines_lwd = 1, main_line = 0.25, main_adj = 0, main_font = 2, main_col = fg, font_family = getOption("rtemis_font", "Helvetica") ) } \arguments{ \item{bg}{Color: Figure background.} \item{plot_bg}{Color: Plot region background.} \item{fg}{Color: Foreground color used as default for multiple elements like axes and labels, which can be defined separately.} \item{pch}{Integer: Point character.} \item{cex}{Float: Character expansion factor.} \item{lwd}{Float: Line width.} \item{bty}{Character: Box type: "o", "l", "7", "c", "u", or "]", or "n".} \item{box_col}{Box color if \code{bty != "n"}.} \item{box_alpha}{Float: Box alpha.} \item{box_lty}{Integer: Box line type.} \item{box_lwd}{Float: Box line width.} \item{grid}{Logical: If TRUE, draw grid in plot regions.} \item{grid_nx}{Integer: N of vertical grid lines.} \item{grid_ny}{Integer: N of horizontal grid lines.} \item{grid_col}{Grid color.} \item{grid_alpha}{Float: Grid alpha.} \item{grid_lty}{Integer: Grid line type.} \item{grid_lwd}{Float: Grid line width.} \item{axes_visible}{Logical: If TRUE, draw axes.} \item{axes_col}{Axes colors.} \item{tick_col}{Tick color.} \item{tick_alpha}{Float: Tick alpha.} \item{tick_labels_col}{Tick labels' color.} \item{tck}{\code{graphics::parr}'s tck argument: Tick length, can be negative.} \item{tcl}{\code{graphics::parr}'s tcl argument.} \item{x_axis_side}{Integer: Side to place x-axis.} \item{y_axis_side}{Integer: Side to place y-axis.} \item{labs_col}{Labels' color.} \item{x_axis_line}{Numeric: \code{graphics::axis}'s \code{line} argument for the x-axis.} \item{x_axis_las}{Numeric: \code{graphics::axis}'s \code{las} argument for the x-axis.} \item{x_axis_padj}{Numeric: x-axis' \code{padj}: Adjustment for the x-axis tick labels' position.} \item{x_axis_hadj}{Numeric: x-axis' \code{hadj}.} \item{y_axis_line}{Numeric: \code{graphics::axis}'s \code{line} argument for the y-axis.} \item{y_axis_las}{Numeric: \code{graphics::axis}'s \code{las} argument for the y-axis.} \item{y_axis_padj}{Numeric: y-axis' \code{padj}.} \item{y_axis_hadj}{Numeric: y-axis' \code{hadj}.} \item{xlab_line}{Numeric: Line to place \code{xlab}.} \item{ylab_line}{Numeric: Line to place \code{ylab}.} \item{zerolines}{Logical: If TRUE, draw lines on x = 0, y = 0, if within plot limits.} \item{zerolines_col}{Zerolines color.} \item{zerolines_alpha}{Float: Zerolines alpha.} \item{zerolines_lty}{Integer: Zerolines line type.} \item{zerolines_lwd}{Float: Zerolines line width.} \item{main_line}{Float: How many lines away from the plot region to draw title.} \item{main_adj}{Float: How to align title.} \item{main_font}{Integer: 1: Regular, 2: Bold.} \item{main_col}{Title color.} \item{font_family}{Character: Font to be used throughout plot.} } \value{ \code{Theme} object. } \description{ Themes for \verb{draw_*} functions } \examples{ theme <- theme_black(font_family = "Geist") theme } ================================================ FILE: man/to_json.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R \name{to_json} \alias{to_json} \title{Convert to JSON-serializable list} \usage{ to_json(x, ...) } \arguments{ \item{x}{rtemis S7 object.} \item{...}{Additional arguments passed to method.} } \value{ Named list. Pass through \code{jsonlite::toJSON(auto_unbox = TRUE)} for serialization. } \description{ Convert an rtemis S7 object to a named list suitable for \code{jsonlite::toJSON(auto_unbox = TRUE)}. Used by the rtemislive backend to send structured results to the browser frontend without scraping R console output. } \details{ Each output list includes a \code{.class} field equal to the most specific S7 class name, allowing the frontend to dispatch to a class-specific renderer. The default method walks \code{props(x)}, recursing into S7-typed properties and passing through primitive properties as-is. Per-class methods override where the default isn't appropriate (e.g. classes whose props include a \code{data.table}, an opaque model fit, or where some props should be excluded for size or relevance reasons). } \author{ EDG } \keyword{internal} ================================================ FILE: man/train.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/train.R \name{train} \alias{train} \title{Train Supervised Learning Models} \usage{ train( x, dat_validation = NULL, dat_test = NULL, weights = NULL, algorithm = NULL, preprocessor_config = NULL, hyperparameters = NULL, tuner_config = NULL, outer_resampling_config = NULL, execution_config = setup_ExecutionConfig(), question = NULL, outdir = NULL, verbosity = 1L, ... ) } \arguments{ \item{x}{Tabular data, i.e. data.frame, data.table, or tbl_df (tibble): Training set data.} \item{dat_validation}{Tabular data: Validation set data.} \item{dat_test}{Tabular data: Test set data.} \item{weights}{Optional vector of case weights.} \item{algorithm}{Character: Algorithm to use. Can be left NULL, if \code{hyperparameters} is defined.} \item{preprocessor_config}{Optional PreprocessorConfig object: Setup using \link{setup_Preprocessor}.} \item{hyperparameters}{\code{Hyperparameters} object: Setup using one of \verb{setup_*} functions.} \item{tuner_config}{TunerConfig object: Setup using \link{setup_GridSearch}.} \item{outer_resampling_config}{Optional ResamplerConfig object: Setup using \link{setup_Resampler}. This defines the outer resampling method, i.e. the splitting into training and test sets for the purpose of assessing model performance. If NULL, no outer resampling is performed, in which case you might want to use a \code{dat_test} dataset to assess model performance on a single test set.} \item{execution_config}{\code{ExecutionConfig} object: Setup using \link{setup_ExecutionConfig}. This allows you to set backend ("future", "mirai", or "none"), number of workers, and future plan if using \code{backend = "future"}.} \item{question}{Optional character string defining the question that the model is trying to answer.} \item{outdir}{Character, optional: String defining the output directory.} \item{verbosity}{Integer: Verbosity level.} \item{...}{Not used.} } \value{ Object of class \code{Regression(Supervised)}, \code{RegressionRes(SupervisedRes)}, \code{Classification(Supervised)}, or \code{ClassificationRes(SupervisedRes)}. } \description{ Preprocess, tune, train, and test supervised learning models using nested resampling in a single call. } \details{ \strong{Online book & documentation} See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation. \strong{Preprocessing} There are many different stages at which preprocessing could be applied, when running a supervised learning pipeline with nested resampling. Some operations are best done before passing data to \code{train()}: \itemize{ \item Duplicate rows should be removed before resampling, so that duplicates don't end up in different resamples, e.g. one in training and one in test. \item Constant columns should be removed before resampling. A column may appear constant in a small resample, even if it is not constant in the full dataset. Removing it inconsistently will throw an error during prediction. \item All data-dependent preprocessing steps need to be performed on training data only and applied on validation and test data, e.g. scaling, centering, imputation. } User-defined preprocessing through \code{preprocessor_config} is applied on training set data, the learned parameters are stored in the returned Supervised or SupervisedRes object, and the preprocessing is applied on validation and test data. \strong{Binary Classification} For binary classification, the outcome should be a factor where \emph{the 2nd level corresponds to the positive class}. \strong{Resampling} Note that you should not use an outer resampling method with replacement if you will also be using an inner resampling (for tuning). The duplicated cases from the outer resampling may appear both in the training and test sets of the inner resamples, leading to underestimated test error. \strong{Reproducibility} If using \emph{\strong{outer resampling}}, you can set a seed when defining \code{outer_resampling_config}, e.g. \if{html}{\out{
}}\preformatted{outer_resampling_config = setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L) }\if{html}{\out{
}} If using \emph{\strong{tuning with inner resampling}}, you can set a seed when defining \code{tuner_config}, e.g. \if{html}{\out{
}}\preformatted{tuner_config = setup_GridSearch( resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold", seed = 2027L) ) }\if{html}{\out{
}} \strong{Parallelization} There are three levels of parallelization that may be used during training: \enumerate{ \item Algorithm training (e.g. a parallelized learner like LightGBM) \item Tuning (inner resampling, where multiple resamples can be processed in parallel) \item Outer resampling (where multiple outer resamples can be processed in parallel) } The \code{train()} function will automatically manage parallelization depending on: \itemize{ \item The number of workers specified by the user using \code{n_workers} \item Whether the training algorithm supports parallelization itself \item Whether hyperparameter tuning is needed } } \examples{ \donttest{ iris_c_lightRF <- train( iris, algorithm = "LightRF", outer_resampling_config = setup_Resampler(), ) } } \author{ EDG } ================================================ FILE: man/uniprot_get.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_uniprot.R \name{uniprot_get} \alias{uniprot_get} \title{Get protein sequence from UniProt} \usage{ uniprot_get( accession, baseURL = "https://rest.uniprot.org/uniprotkb", verbosity = 1 ) } \arguments{ \item{accession}{Character: UniProt Accession number - e.g. "Q9UMX9"} \item{baseURL}{Character: UniProt rest API base URL. Default = "https://rest.uniprot.org/uniprotkb"} \item{verbosity}{Integer: Verbosity level.} } \value{ List with three elements: Identifier, Annotation, and Sequence. } \description{ Get protein sequence from UniProt } \examples{ \dontrun{ # This gets the sequence from uniprot.org by default mapt <- uniprot_get("Q9UMX9") } } \author{ E.D. Gennatas } ================================================ FILE: man/with_msg_sink.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/msg.R \name{with_msg_sink} \alias{with_msg_sink} \title{Run code with a temporary message sink} \usage{ with_msg_sink(sink, code) } \arguments{ \item{sink}{Sink function or \code{NULL}.} \item{code}{Code to run.} } \value{ The value returned by \code{code}. } \description{ Sets \code{sink} for the duration of \code{code}, restoring the previous sink on exit (including on error). Useful in tests and for short-lived capture. } \examples{ captured <- list() with_msg_sink( function(m) captured[[length(captured) + 1L]] <<- m, { # any msg() / msg0() / msgstart() / msgdone() calls in here are captured } ) } \seealso{ \code{\link[=set_msg_sink]{set_msg_sink()}}, \code{\link[=get_msg_sink]{get_msg_sink()}}. } \author{ EDG } ================================================ FILE: man/write_toml.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/00_S7init.R, R/14_SuperConfig.R \name{write_toml} \alias{write_toml} \title{Write to TOML file} \usage{ write_toml(x, file, overwrite = FALSE, verbosity = 1L) ## S7 method for class write_toml(x, file, overwrite = FALSE, verbosity = 1L) } \arguments{ \item{x}{\code{SuperConfig} object.} \item{file}{Character: Path to output TOML file.} \item{overwrite}{Logical: If TRUE, overwrite existing file.} \item{verbosity}{Integer: Verbosity level.} } \value{ \code{SuperConfig} object, invisibly. } \description{ Write to TOML file } \examples{ x <- setup_SuperConfig( dat_training_path = "~/Data/iris.csv", dat_validation_path = NULL, dat_test_path = NULL, weights = NULL, preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE), algorithm = "LightRF", hyperparameters = setup_LightRF(), tuner_config = setup_GridSearch(), outer_resampling_config = setup_Resampler(), execution_config = setup_ExecutionConfig(), question = "Can we tell iris species apart given their measurements?", outdir = "models/", verbosity = 1L ) tmpdir <- tempdir() write_toml(x, file.path(tmpdir, "rtemis.toml")) } \author{ EDG } ================================================ FILE: man/xt_example.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/data_xt_example.R \docType{data} \name{xt_example} \alias{xt_example} \title{Example longitudinal dataset} \format{ A data frame with 30 rows and 4 variables: \describe{ \item{patient_id}{Integer: Patient identifier (1-10).} \item{year}{Integer: Year of measurement (2020-2024).} \item{blood_pressure}{Numeric: Systolic blood pressure measurement.} \item{treatment}{Character: Treatment group ("A" or "B").} } } \usage{ xt_example } \description{ A small synthetic dataset demonstrating various participation patterns in longitudinal data, suitable for examples with \code{\link{xtdescribe}}. } \details{ This dataset includes 10 patients measured at up to 5 time points (years 2020-2024). The dataset demonstrates various participation patterns typical in longitudinal studies: \itemize{ \item Complete participation (all time points) \item Early dropout \item Late entry \item Intermittent participation \item Single time point participation } } \examples{ data(xt_example) head(xt_example) summary(xt_example) } \keyword{datasets} ================================================ FILE: man/xtdescribe.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils_xt.R \name{xtdescribe} \alias{xtdescribe} \title{Describe longitudinal dataset} \usage{ xtdescribe(x, id_col = 1, time_col = 2, n_patterns = 9) } \arguments{ \item{x}{data.frame: Longitudinal data with ID and time variables.} \item{id_col}{Integer: The column position of the ID variable.} \item{time_col}{Integer: The column position of the time variable.} \item{n_patterns}{Integer: The number of patterns to display.} } \value{ data.frame: Summary of participation patterns, returned invisibly. } \description{ This function emulates the \code{xtdescribe} function in Stata. } \examples{ # Load example longitudinal dataset data(xt_example) # Describe the longitudinal structure xtdescribe(xt_example) } \author{ EDG } ================================================ FILE: tests/testthat/test_Calibration.R ================================================ # test_Calibration.R # ::rtemis:: # EDG rtemis.org # Key # {Algorithm}[method] Further conditions # Setup ---- # library(rtemis) # library(testthat) library(data.table) # Data ---- ## Regression Data ---- n <- 400 x <- rnormmat(n, 5, seed = 2025) g <- factor(sample(c("A", "B"), n, replace = TRUE)) y <- x[, 3] + x[, 5] + ifelse(g == "A", 2, -1) + rnorm(n) datr <- data.table(x, g, y) resr <- resample(datr) datr_train <- datr[resr$Fold_1, ] datr_test <- datr[-resr$Fold_1, ] ## Classification Data ---- ### Binary ---- datc2 <- data.frame( gn = factor(sample(c("alpha", "beta", "gamma"), 100, replace = TRUE)), iris[51:150, ] ) datc2$Species <- factor(datc2$Species) resc2 <- resample(datc2) datc2_train <- datc2[resc2$Fold_1, ] datc2_test <- datc2[-resc2$Fold_1, ] ### 3-class ---- datc3 <- iris resc3 <- resample(datc3) datc3_train <- datc3[resc3$Fold_1, ] datc3_test <- datc3[-resc3$Fold_1, ] ================================================ FILE: tests/testthat/test_CheckData.R ================================================ # test_CheckData.R # ::rtemis:: # 2025- EDG rtemis.org # %% check_data() ---- test_that("check_data() succeeds", { x <- data.frame( a = c(1, 2, 3, NA), b = c("A", "B", "C", "D"), c = c(1.5, NA, 3.5, NA) ) x_cd <- check_data(x, get_na_case_pct = TRUE, get_na_feature_pct = TRUE) expect_s7_class(x_cd, CheckData) expect_equal(x_cd$n_na, 3) expect_equal(x_cd$n_cols_anyna, 2) expect_equal(nrow(x_cd$na_feature_pct), 2) expect_equal(x_cd$na_feature_pct$Feature, c("a", "c")) expect_equal(x_cd$na_feature_pct$Pct_NA, c(0.25, 0.5)) expect_equal(nrow(x_cd$na_case_pct), 2) expect_equal(x_cd$na_case_pct$Case, c(2, 4)) expect_equal(x_cd$na_case_pct$Pct_NA, c(1 / 3, 2 / 3)) }) ================================================ FILE: tests/testthat/test_Clustering.R ================================================ # test_Clustering.R # ::rtemis:: # 2025 EDG rtemis.org # Data ---- x <- iris[, -5] # setup_KMeans ---- test_that("setup_KMeans() succeeds", { expect_s7_class(setup_KMeans(), KMeansConfig) }) # setup_KMeans throws error ---- test_that("setup_KMeans() throws error with bad values or wrong types", { expect_error(setup_KMeans(k = -1L)) expect_error(setup_KMeans(dist = "foo")) }) # cluster KMeans ---- test_that("cluster_KMeans() succeeds", { iris_kmeans <- cluster( x, algorithm = "kmeans", config = setup_KMeans(k = 3L) ) expect_s7_class(iris_kmeans, Clustering) }) # cluster KMeans with k = 10 ---- test_that("cluster_KMeans() with k = 10 succeeds", { skip_if_not_installed("flexclust") iris_kmeans10 <- cluster( x, algorithm = "kmeans", config = setup_KMeans(k = 10L) ) expect_s7_class(iris_kmeans10, Clustering) }) # setup_HardCL ---- test_that("setup_HardCL() succeeds", { expect_s7_class(setup_HardCL(), HardCLConfig) }) # cluster HardCL ---- test_that("cluster_HardCL() succeeds", { skip_if_not_installed("flexclust") iris_hardcl <- cluster( x, algorithm = "HardCL", config = setup_HardCL(k = 3L) ) expect_s7_class(iris_hardcl, Clustering) }) # setup_NeuralGas ---- test_that("setup_NeuralGas() succeeds", { expect_s7_class(setup_NeuralGas(), NeuralGasConfig) }) # cluster NeuralGas ---- test_that("cluster_NeuralGas() succeeds", { skip_if_not_installed("flexclust") iris_neuralgas <- cluster( x, algorithm = "NeuralGas", config = setup_NeuralGas(k = 3L) ) expect_s7_class(iris_neuralgas, Clustering) }) # setup_CMeans ---- test_that("setup_CMeans() succeeds", { expect_s7_class(setup_CMeans(), CMeansConfig) }) # cluster CMeans ---- test_that("cluster_CMeans() succeeds", { skip_if_not_installed("e1071") iris_cmeans <- cluster( x, algorithm = "CMeans", config = setup_CMeans(k = 3L) ) expect_s7_class(iris_cmeans, Clustering) }) # setup_DBSCAN ---- test_that("setup_DBSCAN() succeeds", { expect_s7_class(setup_DBSCAN(), DBSCANConfig) }) # cluster DBSCAN ---- test_that("cluster_DBSCAN() succeeds", { skip_if_not_installed("dbscan") iris_dbscan <- cluster( x, algorithm = "DBSCAN", config = setup_DBSCAN(eps = 0.5, min_points = 5L) ) expect_s7_class(iris_dbscan, Clustering) }) ================================================ FILE: tests/testthat/test_Decomposition.R ================================================ # test_Decomposition.R # ::rtemis:: # 2025 EDG rtemis.org # Data ---- x <- iris[, -5] # PCA ---- test_that("setup_PCA() succeeds", { config <- setup_PCA() expect_s7_class(config, PCAConfig) }) test_that("decomp() PCA succeeds", { iris_pca <- decomp(x, algorithm = "pca", config = setup_PCA()) iris_pca expect_s7_class(iris_pca, Decomposition) }) # ICA ---- test_that("setup_ICA() succeeds", { config <- setup_ICA() expect_s7_class(config, ICAConfig) }) test_that("decomp() ICA succeeds", { skip_if_not_installed("fastICA") iris_ica <- decomp(x, algorithm = "ica", config = setup_ICA()) expect_s7_class(iris_ica, Decomposition) }) # NMF ---- test_that("setup_NMF() succeeds", { config <- setup_NMF() expect_s7_class(config, NMFConfig) }) test_that("decomp() NMF succeeds", { skip_if_not_installed("NMF") iris_nmf <- decomp(x, algorithm = "nmf", config = setup_NMF()) expect_s7_class(iris_nmf, Decomposition) }) # UMAP ---- test_that("setup_UMAP() succeeds", { config <- setup_UMAP() expect_s7_class(config, UMAPConfig) }) test_that("decomp() UMAP succeeds", { skip_if_not_installed("uwot") iris_umap <- decomp(x, algorithm = "umap", config = setup_UMAP()) iris_umap <- decomp( x, algorithm = "umap", config = setup_UMAP(n_neighbors = 20L) ) expect_s7_class(iris_umap, Decomposition) }) # t-SNE ---- test_that("setup_tSNE() succeeds", { config <- setup_tSNE() expect_s7_class(config, tSNEConfig) }) # Test that t-SNE fails with duplicates test_that("decomp() t-SNE fails with duplicates", { skip_if_not_installed("Rtsne") expect_error(decomp(x, algorithm = "tsne")) }) # Test that t-SNE works after removing duplicates test_that("decomp() t-SNE succeeds after removing duplicates", { skip_if_not_installed("Rtsne") xp <- preprocess(x, setup_Preprocessor(remove_duplicates = TRUE)) iris_tsne <- decomp( xp@preprocessed, algorithm = "tsne", config = setup_tSNE() ) expect_s7_class(iris_tsne, Decomposition) }) # Isomap ---- test_that("setup_Isomap() succeeds", { config <- setup_Isomap() expect_s7_class(config, IsomapConfig) }) test_that("decomp() Isomap succeeds", { skip_if_not_installed("vegan") iris_isomap <- decomp(x, algorithm = "isomap", config = setup_Isomap()) expect_s7_class(iris_isomap, Decomposition) }) ================================================ FILE: tests/testthat/test_ExecutionConfig.R ================================================ # test_ExecutionConfig.R # ::rtemis:: # 2026- EDG rtemis.org # library(testthat) # %% ExecutionConfig ---- ec <- ExecutionConfig( backend = "future", n_workers = 4L, future_plan = "multisession" ) ec testthat::test_that("ExecutionConfig() works", { expect_s7_class( ec, ExecutionConfig ) }) # %% setup_ExecutionConfig() ---- ec <- setup_ExecutionConfig( backend = "future", n_workers = 4L, future_plan = "multisession" ) testthat::test_that("setup_ExecutionConfig() works", { expect_s7_class( ec, ExecutionConfig ) }) ================================================ FILE: tests/testthat/test_Hyperparameters.R ================================================ # test_Hyperparameters.R # ::rtemis:: # 2025 EDG rtemis.org # Hyperparameters ---- hpr <- Hyperparameters( algorithm = "Custom", hyperparameters = list(alpha = c(0, 1), beta = 2), tunable_hyperparameters = "alpha", fixed_hyperparameters = "beta" ) test_that("Hyperparameters succeeds", { expect_s7_class(hpr, Hyperparameters) # test that tuned is set correctly expect_identical(hpr@tuned, 0L) }) # CARTHyperparameters ---- test_that("CARTHyperparameters() errors", { expect_error(CARTHyperparameters()) }) # get_hyperparams_need_tuning ---- test_that("get_hyperparams_need_tuning() succeeds", { expect_type(get_hyperparams_need_tuning(hpr), "list") }) # Check printing of hp that need tuning ---- # CARTHyperparameters ---- # setup_CART ---- cart_hpr <- setup_CART( prune_cp = c(.001, .01, .1), minsplit = c(2L, 10L), minbucket = c(1L, 10L) ) test_that("setup_CART() succeeds", { expect_s7_class(cart_hpr, CARTHyperparameters) }) # needs_tuning ---- test_that("needs_tuning() succeeds", { expect_type(needs_tuning(cart_hpr), "logical") }) # GLMNETHyperparameters ---- test_that("GLMNETHyperparameters() errors", { expect_error(GLMNETHyperparameters()) }) # setup_GLMNET ---- test_that("setup_GLMNET() succeeds", { expect_s7_class(setup_GLMNET(), GLMNETHyperparameters) }) # LightCARTHyperparameters ---- test_that("LightCARTHyperparameters() errors", { expect_error(LightCARTHyperparameters()) }) # setup_LightCART ---- test_that("setup_LightCART() succeeds", { expect_s7_class(setup_LightCART(), LightCARTHyperparameters) }) # LightRFHyperparameters ---- test_that("LightRFHyperparameters() errors", { expect_error(LightRFHyperparameters()) }) # setup_LightRF ---- test_that("setup_LightRF() succeeds", { lrf_hpr <- setup_LightRF() lrf_hpr expect_s7_class(lrf_hpr, LightRFHyperparameters) }) # LightGBMHyperparameters ---- test_that("LightGBMHyperparameters() errors", { expect_error(LightGBMHyperparameters()) }) # setup_LightGBM ---- test_that("setup_LightGBM() succeeds", { lgbm_hpr <- setup_LightGBM( num_leaves = c(4, 8, 16), learning_rate = c(.001, .01, .1) ) expect_s7_class(setup_LightGBM(), LightGBMHyperparameters) }) # LightRuleFitHyperparameters ---- test_that("LightRuleFitHyperparameters() errors", { expect_error(LightRuleFitHyperparameters()) }) # setup_LightRuleFit ---- test_that("setup_LightRuleFit() succeeds", { expect_s7_class(setup_LightRuleFit(), LightRuleFitHyperparameters) }) # IsotonicHyperparameters ---- test_that("IsotonicHyperparameters() errors", { expect_error(IsotonicHyperparameters()) }) # setup_Isotonic ---- test_that("setup_Isotonic() succeeds", { expect_s7_class(setup_Isotonic(), IsotonicHyperparameters) }) # RadialSVMHyperparameters ---- test_that("RadialSVMHyperparameters() errors", { expect_error(RadialSVMHyperparameters()) }) # setup_LinearSVM ---- test_that("setup_LinearSVM() succeeds", { expect_s7_class(setup_LinearSVM(), LinearSVMHyperparameters) }) # setup_RadialSVM ---- test_that("setup_RadialSVM() succeeds", { expect_s7_class(setup_RadialSVM(), RadialSVMHyperparameters) }) # TabNetHyperparameters ---- test_that("TabNetHyperparameters() errors", { expect_error(TabNetHyperparameters()) }) # setup_TabNet ---- test_that("setup_TabNet() succeeds", { expect_s7_class(setup_TabNet(), TabNetHyperparameters) }) # setup_Ranger ---- test_that("setup_Ranger() succeeds", { expect_s7_class(setup_Ranger(), RangerHyperparameters) }) ================================================ FILE: tests/testthat/test_Metrics.R ================================================ # test_Metrics.R # ::rtemis:: # 2025 EDG rtemis.org # Regression Data ---- set.seed(2025) true <- rnorm(500) predicted <- true + rnorm(500) / 2 predicted2 <- true + rnorm(500) / 2 # RegressionMetrics ---- reg_metrics <- regression_metrics(true, predicted, sample = "Training") reg_metrics test_that("regression_metrics() succeeds", { expect_s7_class(regression_metrics(true, predicted), RegressionMetrics) }) reg_metrics2 <- regression_metrics(true, predicted2, sample = "Test") # Classification Data ---- true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b")) predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a")) predicted_prob <- c(0.3, 0.6, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39) predicted_prob2 <- c(0.2, 0.52, 0.28, 0.85, 0.64, 0.45, 0.9, 0.78, 0.78, 0.47) # ClassificationMetrics ---- class_metrics1 <- classification_metrics( true_labels, predicted_labels, predicted_prob, sample = "Training" ) class_metrics2 <- classification_metrics( true_labels, predicted_labels, predicted_prob2, sample = "Test" ) test_that("classification_metrics() succeeds", { expect_s7_class(class_metrics1, ClassificationMetrics) expect_s7_class(class_metrics2, ClassificationMetrics) }) # Test that class_metrics2 has higher AUC and lower Brier score than class_metrics1 test_that("classification_metrics() returns correct metrics", { expect_true( class_metrics2@metrics[["Overall"]][["AUC"]] > class_metrics1@metrics[["Overall"]][["AUC"]] ) expect_true( class_metrics2@metrics[["Overall"]][["Brier_Score"]] < class_metrics1@metrics[["Overall"]][["Brier_Score"]] ) }) # RegressionMetricsRes ---- res_metrics <- list(mod1 = reg_metrics, mod2 = reg_metrics2) rmcv <- RegressionMetricsRes( sample = "Test", res_metrics = res_metrics ) rmcv test_that("RegressionMetricsRes() succeeds", { expect_s7_class(rmcv, RegressionMetricsRes) }) # ClassificationMetricsRes ---- res_metrics <- list(mod1 = class_metrics1, mod2 = class_metrics2) cmcv <- ClassificationMetricsRes( sample = "Test", res_metrics = res_metrics ) cmcv test_that("ClassificationMetricsRes() succeeds", { expect_s7_class(cmcv, ClassificationMetricsRes) }) ================================================ FILE: tests/testthat/test_Preprocessor.R ================================================ # test_Preprocessor.R # ::rtemis:: # 2025 EDG rtemis.org # library(testthat) # PreprocessorConfig ---- prp <- setup_Preprocessor() prp testthat::test_that("setup_Preprocessor() succeeds", { expect_s7_class(setup_Preprocessor(), PreprocessorConfig) }) prp <- setup_Preprocessor( remove_constants = TRUE, remove_duplicates = TRUE ) testthat::test_that("setup_Preprocessor() succeeds", { expect_s7_class(prp, PreprocessorConfig) }) # Preprocessor: preprocess(PreprocessorConfig) ---- res <- resample(iris, setup_Resampler(seed = 2025)) iris_train <- iris[res$Fold_1, ] iris_test <- iris[-res$Fold_1, ] iris_Pre <- preprocess( iris_train, setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE) ) test_that("preprocess(x, PreprocessorConfig) succeeds", { expect_s7_class(iris_Pre, Preprocessor) }) iris_Pre iris_Pre@preprocessed iris_Pre@values iris_test_Pre <- preprocess(iris_test, iris_Pre) test_that("preprocess(x, Preprocessor) succeeds", { expect_s7_class(iris_test_Pre, Preprocessor) }) iris_Pre_too <- preprocess( iris_train, setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE), dat_test = iris_test ) test_that("preprocess(x, PreprocessorConfig) succeeds", { expect_s7_class(iris_Pre_too, Preprocessor) }) test_that("preprocess(x, PreprocessorConfig) and preprocess(x, Preprocessor) give same test set", { expect_equal(iris_Pre_too@preprocessed$test, iris_test_Pre@preprocessed) }) # impute meanMode ---- x <- iris # Continuous x[10:15, 1] <- NA # Categorical x[20:25, 5] <- NA xp <- preprocess( x, setup_Preprocessor(impute = TRUE, impute_type = "meanMode") )[["preprocessed"]] test_that("impute meanMode works", { expect_false(anyNA(xp)) }) # Test one_hot ---- n <- 10 x <- rnormmat(n, 5, seed = 2025) g <- factor(sample(c("A", "B"), n, replace = TRUE)) y <- x[, 3] + x[, 5] + ifelse(g == "A", 2, -1) + rnorm(n) datr <- data.frame(x, g, y) datr_onehot <- preprocess( datr, setup_Preprocessor(one_hot = TRUE) )[["preprocessed"]] test_that("one_hot.data.frame works", { expect_s3_class(datr_onehot, "data.frame") }) ================================================ FILE: tests/testthat/test_Resampler.R ================================================ # test_Resampler.R # ::rtemis:: # EDG rtemis.org # library(testthat) # StratSubConfig ---- test_that("StratSubConfig succeeds", { rsp <- StratSubConfig( n = 10L, stratify_var = NULL, train_p = .75, strat_n_bins = 4L, id_strat = NULL, seed = NULL ) expect_s7_class(rsp, StratSubConfig) }) # KFoldConfig ---- test_that("KFoldConfig succeeds", { rsp <- KFoldConfig( n = 10L, stratify_var = NULL, strat_n_bins = 4L, id_strat = NULL, seed = NULL ) expect_s7_class(rsp, KFoldConfig) }) # BootstrapConfig ---- test_that("BootstrapConfig succeeds", { rsp <- BootstrapConfig( n = 10L, id_strat = NULL, seed = NULL ) expect_s7_class(rsp, BootstrapConfig) }) # StratBootConfig ---- test_that("StratBootConfig succeeds", { rsp <- StratBootConfig( n = 10L, stratify_var = NULL, train_p = .75, strat_n_bins = 4L, target_length = NULL, id_strat = NULL, seed = NULL ) expect_s7_class(rsp, StratBootConfig) }) # LOOCVConfig ---- test_that("LOOCVConfig succeeds", { rsp <- LOOCVConfig( n = 10L ) expect_s7_class(rsp, LOOCVConfig) }) # CustomConfig ---- test_that("CustomConfig succeeds", { rsp <- CustomConfig( n = 10L ) expect_s7_class(rsp, CustomConfig) }) # setup_Resampler() defaults ---- test_that("setup_Resampler() succeeds", { rsp <- setup_Resampler() expect_s7_class(rsp, ResamplerConfig) }) # setup_Resampler() kfold ---- test_that("setup_Resampler() kfold succeeds", { rsp <- setup_Resampler(type = "KFold") expect_s7_class(rsp, KFoldConfig) }) # setup_Resampler() strat_sub ---- test_that("setup_Resampler() strat_sub succeeds", { rsp <- setup_Resampler(type = "StratSub") expect_s7_class(rsp, StratSubConfig) }) # setup_Resampler() strat_boot ---- test_that("setup_Resampler() strat_boot succeeds", { rsp <- setup_Resampler(type = "StratBoot") expect_s7_class(rsp, StratBootConfig) }) test_that("setup_Resampler() strat_boot fails with invalid train_p", { expect_error( setup_Resampler(type = "StratBoot", train_p = 1) ) }) # setup_Resampler() bootstrap ---- test_that("setup_Resampler() bootstrap succeeds", { rsp <- setup_Resampler(type = "Bootstrap") expect_s7_class(rsp, BootstrapConfig) }) # setup_Resampler() loocv ---- test_that("setup_Resampler() loocv succeeds", { rsp <- setup_Resampler(type = "LOOCV") expect_s7_class(rsp, LOOCVConfig) }) # Resampler ---- test_that("Resampler() succeeds", { res <- Resampler( type = "Custom", resamples = list(), config = setup_Resampler() ) expect_s7_class(res, Resampler) }) # resample() vector ---- ## KFold ---- test_that("resample() vector succeeds", { res <- resample(iris[[1]], setup_Resampler(type = "KFold")) expect_s7_class(res, Resampler) }) ## StratSub ---- test_that("resample() vector succeeds with StratSub", { res <- resample(iris[[1]], setup_Resampler(type = "StratSub")) expect_s7_class(res, Resampler) }) ## StratBoot ---- test_that("resample() vector succeeds with StratBoot", { res <- resample(iris[[1]], setup_Resampler(type = "StratBoot")) expect_s7_class(res, Resampler) }) ## Bootstrap ---- test_that("resample() vector succeeds with Bootstrap", { res <- resample(iris[[1]], setup_Resampler(type = "Bootstrap")) expect_s7_class(res, Resampler) }) ## LOOCV ---- test_that("resample() vector succeeds with LOOCV", { res <- resample(iris[[1]], setup_Resampler(type = "LOOCV")) expect_s7_class(res, Resampler) }) # resample() data.frame ---- test_that("resample() data.frame succeeds", { res <- resample(iris, setup_Resampler()) expect_s7_class(res, Resampler) }) # resample() data.table ---- test_that("resample() data.table succeeds", { res <- resample(as.data.table(iris), setup_Resampler()) expect_s7_class(res, Resampler) }) ================================================ FILE: tests/testthat/test_SuperConfig.R ================================================ # test_SupervisedConfig.R # ::rtemis:: # 2026- EDG rtemis.org # %% SuperConfig ---- test_that("SuperConfig() succeeds", { sc <- SuperConfig( dat_training_path = "train.csv", dat_validation_path = "validation.csv", dat_test_path = "test.csv", weights = NULL, algorithm = "GLMNET", preprocessor_config = setup_Preprocessor(), hyperparameters = setup_GLMNET(), tuner_config = setup_GridSearch(), outer_resampling_config = setup_Resampler(), execution_config = setup_ExecutionConfig(), question = "Can we predict the future from the past?", outdir = "results/", verbosity = 1L ) expect_s7_class(sc, SuperConfig) }) # %% setup_SuperConfig() ---- test_that("setup_SuperConfig() succeeds", { sc <- setup_SuperConfig( dat_training_path = "train.csv", dat_validation_path = "validation.csv", dat_test_path = "test.csv", weights = NULL, preprocessor_config = setup_Preprocessor(), algorithm = "LightGBM", hyperparameters = setup_LightGBM(), tuner_config = setup_GridSearch(), outer_resampling_config = setup_Resampler(), execution_config = setup_ExecutionConfig(), question = "Can we predict the future from the past?", outdir = "models/", verbosity = 1L ) expect_s7_class(sc, SuperConfig) }) # %% train SuperConfig ---- test_that("train() works with SuperConfig", { testthat::skip("For local testing only; requires CSV file") x <- setup_SuperConfig( dat_training_path = "~/Data/iris.csv", dat_validation_path = NULL, dat_test_path = NULL, weights = NULL, preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE), algorithm = "LightRF", hyperparameters = setup_LightRF(), tuner_config = setup_GridSearch(), outer_resampling_config = setup_Resampler(), execution_config = setup_ExecutionConfig(), question = "Can we tell iris species apart given their measurements?", outdir = "models/", verbosity = 1L ) mod <- train(x) expect_s7_class(mod, SupervisedRes) }) # %% Test to_toml.SuperConfig ---- test_that("SuperConfig can be written to TOML", { x <- setup_SuperConfig( dat_training_path = "~/Data/iris.csv", dat_validation_path = NULL, dat_test_path = NULL, weights = NULL, preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE), algorithm = "LightRF", hyperparameters = setup_LightRF(), tuner_config = setup_GridSearch(), outer_resampling_config = setup_Resampler(), execution_config = setup_ExecutionConfig(), question = "Can we tell iris species apart given their measurements?", outdir = "models/", verbosity = 1L ) toml_str <- to_toml(x) expect_type(toml_str, "character") }) # %% write_toml.SuperConfig & read_config ---- test_that("SuperConfig can be written to TOML", { x <- setup_SuperConfig( dat_training_path = "~/Data/iris.csv", dat_validation_path = NULL, dat_test_path = NULL, weights = NULL, preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE), algorithm = "LightRF", hyperparameters = setup_LightRF(), tuner_config = setup_GridSearch(), outer_resampling_config = setup_Resampler(), execution_config = setup_ExecutionConfig(), question = "Can we tell iris species apart given their measurements?", outdir = "models/", verbosity = 1L ) tmpdir <- tempdir() write_toml(x, file.path(tmpdir, "rtemis.toml"), overwrite = TRUE) testthat::expect_true(file.exists(file.path(tmpdir, "rtemis.toml"))) xtoo <- read_config(file.path(tmpdir, "rtemis.toml")) expect_s7_class(xtoo, SuperConfig) }) # %% Test to_yaml.SuperConfig ---- test_that("SuperConfig can be written to YAML", { x <- setup_SuperConfig( dat_training_path = "~/Data/iris.csv", dat_validation_path = NULL, dat_test_path = NULL, weights = NULL, preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE), algorithm = "LightRF", hyperparameters = setup_LightRF(), tuner_config = setup_GridSearch(), outer_resampling_config = setup_Resampler(), execution_config = setup_ExecutionConfig(), question = "Can we tell iris species apart given their measurements?", outdir = "models/", verbosity = 1L ) yaml_str <- to_yaml(x) expect_type(yaml_str, "character") }) ================================================ FILE: tests/testthat/test_SuperConfigLive.R ================================================ # test_SuperConfigLive.R # ::rtemis:: # 2026- EDG rtemis.org # # Tests for `SuperConfigLive` and its `train()` dispatch arm. The wire- # level integration (rtemislive train handler → SuperConfigLive) is # covered in test_rtemislive_dispatch_data_jobs.R / test_rtemislive_serial.R. library(data.table) # Helpers -------------------------------------------------------------------- small_regression_dt <- function(seed = 2030L, n = 60L) { set.seed(seed) dt <- data.table( a = rnorm(n), b = rnorm(n), c = rnorm(n), y = NA_real_ ) dt[, y := a + 0.5 * b + rnorm(n)] dt } # Constructor / props ------------------------------------------------------- test_that("setup_SuperConfigLive returns a SuperConfigLive with expected props", { dt <- small_regression_dt() cfg <- setup_SuperConfigLive( dat_training = dt, algorithm = "glm" ) expect_true(S7_inherits(cfg, SuperConfigLive)) expect_identical(cfg@dat_training, dt) expect_null(cfg@dat_validation) expect_null(cfg@dat_test) expect_equal(cfg@algorithm, "glm") expect_null(cfg@outdir) }) test_that("setup_SuperConfigLive enforces tabular type on dat_training", { expect_error( setup_SuperConfigLive(dat_training = "not a data frame"), regexp = "(class_tabular|tabular|data.frame|data.table)" ) }) test_that("setup_SuperConfigLive accepts a data.frame (not just data.table)", { df <- data.frame(x = 1:5, y = rnorm(5)) cfg <- setup_SuperConfigLive(dat_training = df, algorithm = "glm") expect_s3_class(cfg@dat_training, "data.frame") }) # train() SuperConfigLive dispatch ------------------------------------------ test_that("train(SuperConfigLive) runs end-to-end for a simple GLM regression", { dt <- small_regression_dt(seed = 2031L) cfg <- setup_SuperConfigLive( dat_training = dt, algorithm = "glm", verbosity = 0L ) mod <- train(cfg) expect_true(S7_inherits(mod, Supervised)) expect_equal(mod@algorithm, "GLM") expect_length(mod@predicted_training, nrow(dt)) }) test_that("train(SuperConfigLive) accepts an in-memory validation split", { dt <- small_regression_dt(seed = 2032L) val <- small_regression_dt(seed = 2033L, n = 20L) cfg <- setup_SuperConfigLive( dat_training = dt, dat_validation = val, algorithm = "glm", verbosity = 0L ) mod <- train(cfg) expect_true(S7_inherits(mod, Supervised)) expect_length(mod@predicted_validation, nrow(val)) }) ================================================ FILE: tests/testthat/test_Supervised.R ================================================ # test_Supervised.R # ::rtemis:: # EDG rtemis.org # Key # {Algorithm}[method] Further conditions # Note # We are using very small and simple datasets to reduce runtime. # GLM models are expected to give warnings, including: # - "glm.fit: fitted probabilities numerically 0 or 1 occurred" # - "glm.fit: algorithm did not converge" # %% Packages ---- library(data.table) # Data ---- ## Regression Data ---- n <- 400 x <- rnormmat(n, 5, seed = 2025) g <- factor(sample(c("A", "B"), n, replace = TRUE)) y <- x[, 3] + x[, 5] + ifelse(g == "A", 2, -1) + rnorm(n) datr <- data.table(x, g, y) resr <- resample(datr) datr_train <- datr[resr$Fold_1, ] datr_test <- datr[-resr$Fold_1, ] ## Classification Data ---- ### Binary ---- datc2 <- data.frame( gn = factor(sample(c("alpha", "beta", "gamma"), 100, replace = TRUE)), iris[51:150, ] ) datc2$Species <- factor(datc2$Species) resc2 <- resample(datc2) datc2_train <- datc2[resc2$Fold_1, ] datc2_test <- datc2[-resc2$Fold_1, ] ### 3-class ---- datc3 <- iris resc3 <- resample(datc3) datc3_train <- datc3[resc3$Fold_1, ] datc3_test <- datc3[-resc3$Fold_1, ] ### Synthetic binary data where positive class is 10% of the data ---- # set.seed(2025) # n <- 500 # datc2 <- data.frame( # x1 = rnorm(n), # x2 = rnorm(n), # x3 = rnorm(n), # g = factor(sample(c("A", "B"), n, replace = TRUE, prob = c(.1, .9))) # ) # Binary outcome dependent on x2 and g, with levels "neg" and "pos", where "pos" is 10% of the data # datc2$y <- factor(ifelse(datc2$x2 > 0 & datc2$g == "A", "pos", "neg")) # resc2 <- resample(datc2) # datc2_train <- datc2[resc2$Fold_1, ] # datc2_test <- datc2[-resc2$Fold_1, ] # Utils ---- test_that("class_imbalance() works", { expect_type(class_imbalance(outcome(datc2)), "double") }) # --- GLM ------------------------------------------------------------------------------------------ ## {GLM}[train] ---- mod_r_glm <- train( x = datr_train, dat_test = datr_test, algorithm = "glm" ) test_that("train() GLM Regression succeeds", { expect_s7_class(mod_r_glm, Regression) }) test_that("train() GLM standard errors are available", { expect_type(mod_r_glm@se_training, "double") expect_type(mod_r_glm@se_test, "double") }) ## {GLM}[train] Throw error with missing data ---- datr_train_na <- datr_train datr_train_na[10:2, 1] <- NA test_that("train() GLM Regression with missing data throws error", { expect_error( train(x = datr_train_na, dat_test = datr_test, algorithm = "glm") ) }) ## {GLM}[predict] ---- predicted <- predict(mod_r_glm, features(datr_test)) test_that("predict() GLM Regression succeeds", { expect_identical(mod_r_glm@predicted_test, predicted) expect_null(dim(predicted)) }) ## {GLM}[train] ---- resmod_r_glm <- train( x = datr, algorithm = "glm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold") ) test_that("train() Res GLM Regression succeeds", { expect_s7_class(resmod_r_glm, RegressionRes) }) ## {GLM}[train] ---- mod_c_glm <- train( x = datc2_train, dat_test = datc2_test, algorithm = "glm" ) test_that("train() GLM Classification succeeds", { expect_s7_class(mod_c_glm, Classification) }) ## {GLM}[train] IFW ---- mod_c_glm_ifw <- train( x = datc2_train, dat_test = datc2_test, algorithm = "glm", hyperparameters = setup_GLM(ifw = TRUE) ) test_that("train() GLM Classification with IFW succeeds", { expect_s7_class(mod_c_glm_ifw, Classification) }) ## {GLM}[train] ---- resmod_c_glm <- train( x = datc2, algorithm = "glm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold") ) test_that("train() GLM ClassificationRes succeeds", { expect_s7_class(resmod_c_glm, ClassificationRes) }) # --- GLMNET --------------------------------------------------------------------------------------- ## {GLMNET}[train] ---- mod_r_glmnet <- train( x = datr_train, dat_test = datr_test, algorithm = "glmnet", hyperparameters = setup_GLMNET(lambda = 0.01) ) test_that("train() GLMNET Regression with fixed lambda succeeds", { expect_s7_class(mod_r_glmnet, Regression) }) ## {GLMNET}[predict] ---- predicted <- predict(mod_r_glmnet, features(datr_test)) test_that("predict() GLMNET Regression succeeds", { expect_identical(mod_r_glmnet@predicted_test, predicted) expect_null(dim(predicted)) }) ## {GLMNET}[train] auto-lambda grid search using future ---- test_that( paste( "train > tune_GridSearch resets future plan after execution", "train() GLMNET Regression with auto-lambda grid search using future succeeds" ), { # for local testing only, can't assume multisession or multicore are available skip_if_not_installed("futurize") # Simulate user has set plan to multisession with 2 workers # with(future::plan("multisession", workers = 2L), local = TRUE) # Simulate user has set plan to sequential with(future::plan("sequential"), local = TRUE) # Run train with multicore and 4 workers modt_r_glmnet <- train( x = datr_train, dat_test = datr_test, algorithm = "glmnet", hyperparameters = setup_GLMNET(alpha = 1), execution_config = setup_ExecutionConfig( backend = "future", n_workers = 2L, # Limit to 2 workers for CRAN future_plan = "mirai_multisession" # which gets converted to "future.mirai::mirai_multisession" ), verbosity = 2L ) # Check that model trained correctly expect_s7_class(modt_r_glmnet, Regression) # Check that future plan has been reset to "multisession" with 2 workers # expect_equal(rtemis:::identify_plan(), "multisession") # Check that future plan has been reset to "sequential" expect_equal(rtemis:::identify_plan(), "sequential") expect_equal(future::nbrOfWorkers(), 1L) } ) ## {GLMNET}[train] /\Error sequential with >1 worker ---- test_that("sequential with >1 worker throws error", { skip_if_not_installed("futurize") expect_error( modt_r_glmnet <- train( x = datr_train, dat_test = datr_test, algorithm = "glmnet", hyperparameters = setup_GLMNET(alpha = 1), execution_config = setup_ExecutionConfig( backend = "future", future_plan = "sequential", n_workers = 2L ) ) ) }) ## {GLMNET}[train] auto-lambda grid search using mirai ---- test_that("train() GLMNET Regression with auto-lambda grid search using mirai succeeds", { skip_if_not_installed("mirai") modt_r_glmnet <- train( x = datr_train, dat_test = datr_test, algorithm = "glmnet", hyperparameters = setup_GLMNET(alpha = 1), execution_config = setup_ExecutionConfig(backend = "mirai", n_workers = 2L) ) expect_s7_class(modt_r_glmnet, Regression) }) ## {GLMNET}[train] auto-lambda + alpha grid search ---- test_that("train() GLMNET Regression with auto-lambda + alpha grid search succeeds", { modt_r_glmnet <- train( x = datr_train, dat_test = datr_test, algorithm = "glmnet", hyperparameters = setup_GLMNET(alpha = c(0, 1)), execution_config = setup_ExecutionConfig(backend = "none") ) expect_s7_class(modt_r_glmnet, Regression) }) ## {GLMNET}[train] auto-lambda + alpha grid search ---- test_that("train() Res-GLMNET Regression with auto-lambda + alpha grid search succeeds", { resmodt_r_glmnet <- train( x = datr_train, algorithm = "glmnet", hyperparameters = setup_GLMNET(alpha = c(0.5, 1)), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) expect_s7_class(resmodt_r_glmnet, RegressionRes) }) ## {GLMNET}[train] ---- modt_c_glmnet <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_GLMNET(alpha = 1, lambda = 0.01) ) test_that("train() GLMNET Classification succeeds", { expect_s7_class(modt_c_glmnet, Classification) }) ## {GLMNET}[train] Multiclass ---- test_that("train() GLMNET Multiclass Classification succeeds", { modt_c3_glmnet <- train( x = datc3_train, dat_test = datc3_test, hyperparameters = setup_GLMNET(alpha = 1), execution_config = setup_ExecutionConfig(backend = "none") ) expect_s7_class(modt_c3_glmnet, Classification) }) # --- GAM ------------------------------------------------------------------------------------------ ## {GAM}[train] spline & parametric ---- mod_r_gam <- train( x = datr_train, dat_test = datr_test, algorithm = "gam" ) test_that("train() GAM Regression with spline + parametric terms succeeds.", { expect_s7_class(mod_r_gam, Regression) }) ## {GAM}[train] spline only ---- mod_r_gam <- train( x = datr_train[, -6], dat_test = datr_test[, -6], algorithm = "gam" ) test_that("train() GAM Regression with only spline terms succeeds.", { expect_s7_class(mod_r_gam, Regression) }) ## {GAM}[train] parametric only ---- mod_r_gam <- train( x = datr_train[, 6:7], dat_test = datr_test[, 6:7], algorithm = "gam" ) test_that("train() GAM Regression with only parametric terms succeeds.", { expect_s7_class(mod_r_gam, Regression) }) ## {GAM}[train] grid search ---- modt_r_gam <- train( x = datr_train, dat_test = datr_test, algorithm = "gam", hyperparameters = setup_GAM(k = c(3, 5, 7)) ) test_that("train() GAM Regression with grid_search() succeeds", { expect_s7_class(modt_r_gam, Regression) }) ## {GAM}[predict] ---- test_that("predict() GAM Regression works", { expect_error(predicted <- predict(modt_r_gam, datr_test)) predicted <- predict(modt_r_gam, features(datr_test)) expect_identical(modt_r_gam@predicted_test, predicted) }) ## {GAM}[train] ---- resmod_r_gam <- train( x = datr, algorithm = "gam", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold") ) ## {GAM}[train] ---- mod_c_gam <- train( x = datc2_train, dat_test = datc2_test, algorithm = "gam" ) test_that("train() GAM Classification succeeds", { expect_s7_class(mod_c_gam, Classification) }) ## {GAM}[train] IFW ---- mod_c_gam_ifw <- train( x = datc2_train, dat_test = datc2_test, algorithm = "gam", hyperparameters = setup_GAM(ifw = TRUE) ) test_that("train() GAM Classification with IFW succeeds", { expect_s7_class(mod_c_gam_ifw, Classification) }) # --- LinearSVM ------------------------------------------------------------------------------------ ## {LinearSVM}[train] ---- mod_r_svml <- train( x = datr_train, dat_test = datr_test, hyperparameters = setup_LinearSVM() ) test_that("train() LinearSVM Regression succeeds", { expect_s7_class(mod_r_svml, Regression) }) ## {LinearSVM}[train] Tuned ---- modt_r_svml <- train( x = datr_train, dat_test = datr_test, hyperparameters = setup_LinearSVM(cost = c(1, 10)), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() LinearSVM Regression with tuning succeeds", { expect_s7_class(modt_r_svml, Regression) }) ## {LinearSVM}[train] ---- resmod_r_svml <- train( x = datr, algorithm = "linearsvm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold") ) test_that("train() Res LinearSVM Regression succeeds", { expect_s7_class(resmod_r_svml, RegressionRes) }) ## {LinearSVM}[train] ---- mod_c_linearsvm <- train( x = datc2_train, dat_test = datc2_test, algorithm = "linearsvm" ) test_that("train() LinearSVM Classification succeeds", { expect_s7_class(mod_c_linearsvm, Classification) }) ## {LinearSVM}[train] Multiclass ---- mod_c3_linearsvm <- train( x = datc3_train, dat_test = datc3_test, algorithm = "linearsvm" ) test_that("train() LinearSVM Multiclass Classification succeeds", { expect_s7_class(mod_c3_linearsvm, Classification) }) ## {LinearSVM}[train] ---- resmod_c_linearsvm <- train( x = datc2, algorithm = "linearsvm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Res LinearSVM Classification succeeds", { expect_s7_class(resmod_c_linearsvm, ClassificationRes) }) # --- RadialSVM ------------------------------------------------------------------------------------ ## {RadialSVM}[train] ---- mod_r_svmr <- train( x = datr_train, dat_test = datr_test, hyperparameters = setup_RadialSVM() ) test_that("train() RadialSVM Regression succeeds", { expect_s7_class(mod_r_svmr, Regression) }) ## {RadialSVM}[train] Tuned ---- modt_r_svmr <- train( x = datr_train, dat_test = datr_test, hyperparameters = setup_RadialSVM(cost = c(1, 10, 100)) ) test_that("train() RadialSVM Regression with tuning succeeds", { expect_s7_class(modt_r_svmr, Regression) }) ## {RadialSVM}[train] ---- resmod_r_svmr <- train( x = datr, algorithm = "radialsvm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Res RadialSVM Regression succeeds", { expect_s7_class(resmod_r_svmr, RegressionRes) }) ## {RadialSVM}[train] Tuned ---- resmodt_r_svmr <- train( x = datr, hyperparameters = setup_RadialSVM(cost = c(1, 10)), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Res RadialSVM Regression with tuning succeeds", { expect_s7_class(resmodt_r_svmr, RegressionRes) }) ## {RadialSVM}[train] ---- mod_c_radialsvm <- train( x = datc2_train, dat_test = datc2_test, algorithm = "radialsvm" ) test_that("train() RadialSVM Classification succeeds", { expect_s7_class(mod_c_radialsvm, Classification) }) ## {RadialSVM}[train] Tuned ---- modt_c_radialsvm <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_RadialSVM(cost = c(1, 10)), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() RadialSVM Classification with tuning succeeds", { expect_s7_class(modt_c_radialsvm, Classification) }) ## {RadialSVM}[train] ---- resmod_c_radialsvm <- train( x = datc2, algorithm = "radialsvm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Res RadialSVM Classification succeeds", { expect_s7_class(resmod_c_radialsvm, ClassificationRes) }) ## {RadialSVM}[train] Tuned ---- resmodt_c_radialsvm <- train( x = datc2, hyperparameters = setup_RadialSVM(cost = c(1, 10)), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Res RadialSVM Classification with tuning succeeds", { expect_s7_class(resmodt_c_radialsvm, ClassificationRes) }) ## {RadialSVM}[train] Multiclass ---- modt_c3_radialsvm <- train( x = datc3_train, dat_test = datc3_test, hyperparameters = setup_RadialSVM() ) test_that("train() RadialSVM Multiclass Classification succeeds", { expect_s7_class(modt_c3_radialsvm, Classification) }) # --- CART ----------------------------------------------------------------------------------------- ## {CART}[train] ---- mod_r_cart <- train( datr_train, dat_test = datr_test, algorithm = "cart" ) test_that("train() Regression succeeds", { expect_s7_class(mod_r_cart, Regression) }) ## {CART}[train] Grid search ---- ## {CART} Check tuned == 0---- hyperparameters <- setup_CART( maxdepth = c(1, 2, 10), minbucket = c(1L, 4L) ) test_that("tuned field is set correctly", { expect_identical(hyperparameters@tuned, 0L) }) modt_r_cart <- train( datr_train, dat_test = datr_test, hyperparameters = setup_CART(maxdepth = 2:3), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Regression with grid_search() succeeds", { expect_s7_class(modt_r_cart, Regression) }) ## {CART} Check tuned == 1---- test_that("tuned is set correctly", { expect_identical(modt_r_cart@hyperparameters@tuned, 1L) }) ## {CART}[train] ---- resmod_r_cart <- train( x = datr, hyperparameters = setup_CART(), outer_resampling_config = setup_Resampler(3L), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() RegressionRes succeeds", { expect_s7_class(resmod_r_cart, RegressionRes) }) ## {CART}[train] Tuned ---- resmodt_r_cart <- train( x = datr, hyperparameters = setup_CART(maxdepth = 1:2, prune_cp = c(.001, .01)), outer_resampling_config = setup_Resampler(3), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() RegressionRes succeeds", { expect_s7_class(resmodt_r_cart, RegressionRes) }) ## {CART}[train] prune_cp ---- resmod_r_cart <- train( x = datr, hyperparameters = setup_CART(prune_cp = c(.001, .01)), outer_resampling_config = setup_Resampler(3L), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() RegressionRes succeeds", { expect_s7_class(resmod_r_cart, RegressionRes) }) ## {CART}[train] ---- # model <- train_CART(dat_training = datc2_train, dat_test = datc2_test) # model$method #"class" modt_c_cart <- train( x = datc2_train, dat_test = datc2_test, algorithm = "cart", hyperparameters = setup_CART(maxdepth = 1:2) ) test_that("train() CART Classification succeeds", { expect_s7_class(modt_c_cart, Classification) }) ## {CART}[train] IFW ---- mod_c_cart_ifw <- train( x = datc2_train, dat_test = datc2_test, algorithm = "cart", hyperparameters = setup_CART( ifw = TRUE ) ) test_that("train() CART Classification with IFW succeeds", { expect_s7_class(mod_c_cart_ifw, Classification) }) ## {CART}[train] Grid search ---- modt_c_cart_tuned <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_CART( maxdepth = c(1L, 2L) ), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Classification with grid_search() succeeds", { expect_s7_class(modt_c_cart_tuned, Classification) }) ## {CART}[train] ---- # Can be used to test different parallelization methods during tuning resmodt_c_cart <- train( x = datc2, algorithm = "cart", hyperparameters = setup_CART( maxdepth = c(1L, 2L) ), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() CART ClassificationRes succeeds", { expect_s7_class(resmodt_c_cart, ClassificationRes) }) ## {CART}[train] Multiclass ---- modt_c3_cart <- train( x = datc3_train, dat_test = datc3_test, algorithm = "cart" ) test_that("train() CART Multiclass Classification succeeds", { expect_s7_class(modt_c3_cart, Classification) }) # --- LightCART ------------------------------------------------------------------------------------ ## {LightCART}[train] ---- mod_r_lightcart <- train( x = datr_train, dat_test = datr_test, algorithm = "lightcart" ) test_that("train() LightCART Regression succeeds", { expect_s7_class(mod_r_lightcart, Regression) }) mod_r_lightcartlin <- train( x = datr_train, dat_test = datr_test, algorithm = "lightcart", hyperparameters = setup_LightCART( linear_tree = TRUE ) ) test_that("train() LightCART Regression with linear_tree succeeds", { expect_s7_class(mod_r_lightcartlin, Regression) expect_identical( mod_r_lightcartlin@hyperparameters$linear_tree, mod_r_lightcartlin@model$params$linear_tree ) }) ## {LightCART}[train] ---- mod_c_lightcart <- train( x = datc2_train, dat_test = datc2_test, algorithm = "lightcart" ) test_that("train() LightCART Classification succeeds", { expect_s7_class(mod_c_lightcart, Classification) }) ## {LightCART}[train] Multiclass ---- modt_c3_lightcart <- train( x = datc3_train, dat_test = datc3_test, algorithm = "lightcart" ) test_that("train() LightCART Multiclass Classification succeeds", { expect_s7_class(modt_c3_lightcart, Classification) }) # --- LightRF -------------------------------------------------------------------------------------- ## {LightRF}[train] ---- mod_r_lightrf <- train( x = datr_train, dat_test = datr_test, algorithm = "lightrf", hyperparameters = setup_LightRF( nrounds = 20L, lambda_l1 = .1, lambda_l2 = .1 ) ) test_that("train() LightRF Regression with l1, l2 succeeds", { expect_s7_class(mod_r_lightrf, Regression) }) ## {LightRF}[predict] ---- predicted <- predict(mod_r_lightrf, features(datr_test)) test_that("predict() LightRF Regression succeeds", { expect_identical(mod_r_lightrf@predicted_test, predicted) expect_null(dim(predicted)) }) ## {LightRF}[train] Grid search ---- modt_r_lightrf <- train( x = datr_train, dat_test = datr_test, algorithm = "lightrf", hyperparameters = setup_LightRF( nrounds = 20L, lambda_l1 = c(0, .1) ), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() LightRF Regression with l1 tuning succeeds", { expect_s7_class(modt_r_lightrf, Regression) }) ## {LightRF}[train] ---- resmodt_r_lightrf <- train( x = datr, algorithm = "lightrf", hyperparameters = setup_LightRF( nrounds = 20L, lambda_l1 = c(0, 10) ), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Res LightRF Regression with l1 tuning succeeds", { expect_s7_class(resmodt_r_lightrf, RegressionRes) }) ## {LightRF}[train] ---- mod_c_lightrf <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_LightRF(nrounds = 20L) ) test_that("train() LightRF Binary Classification succeeds", { expect_s7_class(mod_c_lightrf, Classification) }) ## {LightRF}[predict] ---- predicted_prob_test <- predict(mod_c_lightrf, features(datc2_test)) test_that("predict() LightRF Classification succeeds", { expect_identical(mod_c_lightrf@predicted_prob_test, predicted_prob_test) }) ## {LightRF}[train] Tuned ---- modt_c_lightrf <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_LightRF(nrounds = 20L, max_depth = c(-1, 5)) ) test_that("train() LightRF Binary Classification with tuning succeeds", { expect_s7_class(modt_c_lightrf, Classification) }) ## {LightRF}[train] ---- resmod_c_lightrf <- train( x = datc2, hyperparameters = setup_LightRF(nrounds = 20L), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() LightRF ClassificationRes succeeds", { expect_s7_class(resmod_c_lightrf, ClassificationRes) }) ## {LightRF}[train] Multiclass ---- modt_c3_lightrf <- train( x = datc3_train, dat_test = datc3_test, hyperparameters = setup_LightRF(nrounds = 20L) ) test_that("train() LightRF Multiclass Classification succeeds", { expect_s7_class(modt_c3_lightrf, Classification) }) ## {LightGBM}[train] ---- mod_r_lightgbm <- train( x = datr_train, dat_test = datr_test, algorithm = "lightgbm", hyperparameters = setup_LightGBM( force_nrounds = 50 ) ) test_that("train() LightGBM Regression succeeds", { expect_s7_class(mod_r_lightgbm, Regression) }) ## {LightGBM}[train] Autotune nrounds ---- modt_r_lightgbm <- train( x = datr_train, dat_test = datr_test, hyperparameters = setup_LightGBM() ) test_that("train() LightGBM Regression with autotune nrounds succeeds", { expect_s7_class(modt_r_lightgbm, Regression) }) ## {LightGBM}[train] Autotune nrounds ---- resmodt_r_lightgbm <- train( x = datr_train, hyperparameters = setup_LightGBM(max_nrounds = 50L), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Res LightGBM Regression with autotune nrounds succeeds", { expect_s7_class(resmodt_r_lightgbm, RegressionRes) }) ## {LightGBM}[train] ---- mod_c_lightgbm <- train( x = datc2_train, dat_test = datc2_test, algorithm = "lightgbm", # hyperparameters = setup_LightGBM( # force_nrounds = 100L # ), tuner_config = setup_GridSearch( resampler_config = setup_Resampler( n_resamples = 3L, type = "KFold" ) ), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() LightGBM Classification succeeds", { expect_s7_class(mod_c_lightgbm, Classification) }) ## {LightGBM}[train] Multiclass ---- modt_c3_lightgbm <- train( x = datc3_train, dat_test = datc3_test, hyperparameters = setup_LightGBM( force_nrounds = 20L ) ) test_that("train() LightGBM Multiclass Classification succeeds", { expect_s7_class(modt_c3_lightgbm, Classification) }) ## {LightRuleFit}[train] ---- mod_r_lightrlft_l1l2 <- train( x = datr_train, dat_test = datr_test, hyperparameters = setup_LightRuleFit( nrounds = 50L, lambda_l1 = 10, lambda_l2 = 10, lambda = 0.01 ) ) test_that("train() LightRuleFit Regression with l1, l2 params passed", { expect_s7_class(mod_r_lightrlft_l1l2, Regression) expect_identical( mod_r_lightrlft_l1l2@model@model_lightgbm@model$params$lambda_l1, 10 ) expect_identical( mod_r_lightrlft_l1l2@model@model_lightgbm@model$params$lambda_l2, 10 ) }) ## {LightRuleFit}[train] ---- mod_c_lightrlft <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_LightRuleFit(nrounds = 50L), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() LightRuleFit Binary Classification succeeds", { expect_s7_class(mod_c_lightrlft, Classification) }) ## {TabNet}[train] ---- # Test if lantern is installed if (torch::torch_is_installed()) { mod_r_tabnet <- train( x = datr_train, dat_test = datr_test, algorithm = "tabnet", hyperparameters = setup_TabNet(epochs = 3L, learn_rate = .01) ) test_that("train() TabNet Regression succeeds", { expect_s7_class(mod_r_tabnet, Regression) }) } ## {TabNet}[train] ---- if (torch::torch_is_installed()) { mod_c_tabnet <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_TabNet(epochs = 3L, learn_rate = .01) ) test_that("train() TabNet Classification succeeds", { expect_s7_class(mod_c_tabnet, Classification) }) } ## {TabNet}[train] Multiclass ---- if (torch::torch_is_installed()) { modt_c3_tabnet <- train( x = datc3_train, dat_test = datc3_test, hyperparameters = setup_TabNet(epochs = 3L, learn_rate = .01) ) test_that("train() TabNet Multiclass Classification succeeds", { expect_s7_class(modt_c3_tabnet, Classification) }) } ## {Isotonic}[train] ---- x <- rnorm(50) y <- x^5 + rnorm(50) dat <- data.table(x, y) mod_iso <- train(dat, algorithm = "Isotonic") test_that("train() Isotonic Regression succeeds", { expect_s7_class(mod_iso, Regression) }) ## {Isotonic}[train] ---- set.seed(2025) x <- rnorm(200) y <- factor(ifelse(x > mean(x), "b", "a")) x <- x + rnorm(200) / 3 dat <- data.frame(x, y) cmod_iso <- train(dat, algorithm = "Isotonic") test_that("train() Isotonic Classification succeeds", { expect_s7_class(cmod_iso, Classification) }) # --- Ranger --------------------------------------------------------------------------------------- ## {Ranger}[train] ---- mod_r_ranger <- train( x = datr_train, dat_test = datr_test, hyperparameters = setup_Ranger(num_trees = 50L) ) test_that("train() Ranger Regression succeeds", { expect_s7_class(mod_r_ranger, Regression) }) ## {Ranger}[train] Grid search ---- modt_r_ranger <- train( x = datr_train, dat_test = datr_test, hyperparameters = setup_Ranger(num_trees = 50L, mtry = c(3, 6)), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Ranger Regression with grid search succeeds", { expect_s7_class(modt_r_ranger, Regression) }) ## {Ranger}[train] ---- resmod_r_ranger <- train( x = datr, hyperparameters = setup_Ranger(num_trees = 5000L), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold") ) test_that("train() Res Ranger Regression succeeds", { expect_s7_class(resmod_r_ranger, RegressionRes) }) ## {Ranger}[train] ---- mod_c_ranger <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_Ranger(num_trees = 10L) ) test_that("train() Ranger Classification succeeds", { expect_s7_class(mod_c_ranger, Classification) }) ## {Ranger}[train] Grid search ---- modt_c_ranger <- train( x = datc2_train, dat_test = datc2_test, hyperparameters = setup_Ranger(num_trees = 10L, mtry = c(2, 4)), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Ranger Classification with grid search succeeds", { expect_s7_class(modt_c_ranger, Classification) }) ## {Ranger}[train] ---- resmod_c_ranger <- train( x = datc2, hyperparameters = setup_Ranger(num_trees = 10L), outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), execution_config = setup_ExecutionConfig(backend = "none") ) test_that("train() Res Ranger Classification succeeds", { expect_s7_class(resmod_c_ranger, ClassificationRes) }) ## {Ranger}[train] Multiclass ---- modt_c3_ranger <- train( x = datc3_train, dat_test = datc3_test, hyperparameters = setup_Ranger(num_trees = 10L) ) test_that("train() Ranger Multiclass Classification succeeds", { expect_s7_class(modt_c3_ranger, Classification) }) # --- Predict SupervisedRes ------------------------------------------------------------------------ ## {CART}[predict] ---- predicted_mean <- predict(resmod_r_cart, newdata = features(datr_test)) test_that("predict() SupervisedRes succeeds", { expect_true(length(predicted_mean) == nrow(datr_test)) }) # --- Calibration ---------------------------------------------------------------------------------- ## {LightRF}[calibrate] ---- # Calibrate mod_c_lightrf trained above model <- mod_c_lightrf predicted_probabilities <- model$predicted_prob_training true_labels <- model$y_training mod_c_lightrf_cal <- calibrate( mod_c_lightrf, predicted_probabilities = mod_c_lightrf$predicted_prob_training, true_labels = mod_c_lightrf$y_training ) test_that("calibrate() succeeds on Classification", { expect_s7_class(mod_c_lightrf_cal, CalibratedClassification) }) ## {LightRF}[predict] ---- newdata <- features(datc2_test) predicted_prob_test_cal <- predict(mod_c_lightrf_cal, newdata = newdata) test_that("predict() CalibratedClassification succeeds", { expect_identical( mod_c_lightrf_cal@predicted_prob_test_calibrated, predicted_prob_test_cal ) }) # --- CalibratedClassificationRes ------------------------------------------------------------------ ## {LightRF}[calibrate] resmod_c_lightrf_cal <- calibrate(resmod_c_lightrf) test_that("calibrate() succeeds on ClassificationRes", { expect_s7_class(resmod_c_lightrf_cal, CalibratedClassificationRes) }) ## {GLM}[describe] ---- test_that("describe.Regression returns character", { desc <- describe(mod_r_glm) expect_type(desc, "character") }) ## {GLM}[plot_true_pred] ---- test_that("plot_true_pred.Supervised creates a plotly object", { p <- plot_true_pred(mod_r_glm) expect_s3_class(p, "plotly") }) ## {GLM}[plot_true_pred] ---- test_that("plot_true_pred creates a plotly object", { p <- plot_true_pred(mod_r_glm) expect_s3_class(p, "plotly") }) ## {GLM}[present] ---- test_that("present.Supervised creates a plotly object", { p <- present(mod_r_glm) expect_s3_class(p, "plotly") }) ## {GLM}[describe] ---- test_that("describe.Classification returns character", { desc <- describe(mod_c_glm) expect_type(desc, "character") }) ## {GLM}[plot_true_pred] ---- test_that("plot_true_pred.Classification creates a plotly object", { p <- plot_true_pred(mod_c_glm) expect_s3_class(p, "plotly") }) ## {GLM}[plot_true_pred] ---- test_that("plot_true_pred creates a plotly object", { p <- plot_true_pred(mod_c_glm) expect_s3_class(p, "plotly") }) ## {GLM}[draw_roc] ---- test_that("draw_roc creates a plotly object", { p <- draw_roc( true_labels = list( Training = mod_c_glm@y_training, Test = mod_c_glm@y_test ), predicted_prob = list( Training = mod_c_glm@predicted_prob_training, Test = mod_c_glm@predicted_prob_test ) ) expect_s3_class(p, "plotly") }) test_that("plot_roc.Classification creates a plotly object", { p <- plot_roc(mod_c_glm) expect_s3_class(p, "plotly") }) ## {CART}[plot_roc] Tuned ---- test_that("plot_roc.ClassificationRes creates a plotly object", { p <- plot_roc(resmodt_c_cart) expect_s3_class(p, "plotly") }) ## {GLM}[plot_metric] ---- test_that("plot_metric.SupervisedRes creates a plotly object", { p <- plot_metric(resmod_r_glm) expect_s3_class(p, "plotly") }) ## {GLM}[plot_metric] ---- test_that("plot_metric.SupervisedRes creates a plotly object", { p <- plot_metric(resmod_c_glm) expect_s3_class(p, "plotly") }) ## {GLM}[plot_true_pred] ---- test_that("plot_true_pred RegressionRes creates a plotly object", { p <- plot_true_pred(resmod_r_glm) expect_s3_class(p, "plotly") }) ## {GLM}[plot_true_pred] ---- test_that("plot_true_pred ClassificationRes creates a plotly object", { p <- plot_true_pred(resmod_c_glm) expect_s3_class(p, "plotly") }) ## {GLM}[present] ---- test_that("present.Supervised creates a plotly object", { p <- present(mod_r_glm) expect_s3_class(p, "plotly") }) ## {GLM}[present] ---- test_that("present.Supervised creates a plotly object", { p <- present(mod_c_glm) expect_s3_class(p, "plotly") }) ## {GLM}[present] ---- test_that("present() RegressionRes object creates a plotly object", { p <- present(resmod_r_glm) expect_s3_class(p, "plotly") }) ## {GLM}[present] ---- test_that("present() ClassificationRes object creates a plotly object", { p <- present(resmod_c_glm) expect_s3_class(p, "plotly") }) ## {Multi}[present] ---- test_that("present() multiple RegressionRes objects creates a plotly object", { p <- present(list(resmod_r_glm, resmod_r_cart)) expect_s3_class(p, "plotly") }) ## {Multi}[present] ---- test_that("present() multiple ClassificationRes objects creates a plotly object", { p <- present(list(resmod_c_glm, resmodt_c_cart)) expect_s3_class(p, "plotly") }) ## {Multi}[present] ---- test_that("present() multiple Regression objects creates a plotly object", { p <- present(list(mod_r_glm, mod_r_cart)) expect_s3_class(p, "plotly") }) ## {CART}[plot_varimp] ---- test_that("plot_varimp RegressionRes creates a plotly object", { p <- plot_varimp(resmod_r_cart) expect_s3_class(p, "plotly") }) ## {GLM}[train] Outdir ---- test_that("train saves model to rds successfully", { temp_dir <- withr::local_tempdir() outdir <- file.path(temp_dir, "mod_r_glm") mod_r_glm <- train( x = datr_train, dat_test = datr_test, algorithm = "glm", outdir = outdir ) expect_true(file.exists(file.path(outdir, "train_GLM.rds"))) }) ## {GLM}[train] Outdir ---- test_that("train saves SupervisedRes model to rds successfully", { temp_dir <- withr::local_tempdir() outdir <- file.path(temp_dir, "resmod_r_glm") resmod_r_glm <- train( x = datr, algorithm = "glm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"), outdir = outdir ) expect_true(file.exists(file.path(outdir, "train_GLM.rds"))) }) ## {CART}[repr] Tuned ---- modt_c_cart_repr <- repr(modt_c_cart, output_type = "ansi") test_that("repr() Tuned Classification succeeds", { expect_type(modt_c_cart_repr, "character") }) ## {CART}[repr] Tuned ---- resmodt_c_cart_repr <- repr(resmodt_c_cart, output_type = "ansi") test_that("repr() Tuned ClassificationRes succeeds", { expect_type(resmodt_c_cart_repr, "character") }) ## {CART}[repr] Tuned ---- modt_r_cart_repr <- repr(modt_r_cart, output_type = "ansi") test_that("repr() Tuned Regression succeeds", { expect_type(modt_r_cart_repr, "character") }) ## {CART}[repr] Tuned ---- resmodt_r_cart_repr <- repr(resmodt_r_cart, output_type = "ansi") test_that("repr() Tuned RegressionRes succeeds", { expect_type(resmodt_r_cart_repr, "character") }) # --- Describe & present list of Supervised -------------------------------------------------------- ## {Multi}[describe] List ---- x <- list( modt_c_cart, mod_c_lightrf, mod_c_lightgbm ) out <- describe(x) test_that("describe() list of Classification objects returns character", { expect_type(out, "character") }) ## {Multi}[present] List ---- plt <- present(x) test_that("present() list of Classification objects returns plotly object", { expect_s3_class(plt, "plotly") }) ## {Multi}[describe] List ---- x <- list( mod_r_glmnet, mod_r_svmr, mod_r_lightrf ) out <- describe(x) test_that("describe() list of Regression objects returns character", { expect_type(out, "character") }) ## {Multi}[present] List ---- plt <- present(x) test_that("present() list of Regression objects returns plotly object", { expect_s3_class(plt, "plotly") }) # Describe & present list of SupervisedRes---- ## {Multi}[describe] List ---- x <- list( resmod_c_glm, resmod_c_linearsvm, resmod_c_lightrf ) out <- describe(x) test_that("describe() list of ClassificationRes objects returns character", { expect_type(out, "character") }) ## {Multi}[present] List ---- plt <- present(x) test_that("present() list of ClassificationRes objects returns plotly object", { expect_s3_class(plt, "plotly") }) ## {Multi}[describe] List ---- x <- list( resmod_r_glm, resmod_r_svml, resmodt_r_lightrf ) out <- describe(x) test_that("describe() list of RegressionRes objects returns character", { expect_type(out, "character") }) ## {Multi}[present] List ---- plt <- present(x) test_that("present() list of RegressionRes objects returns plotly object", { expect_s3_class(plt, "plotly") }) # --- CalibratedClassificationRes ------------------------------------------------------------------ ## {GLM}[calibrate] ---- # Using resmod_c_glm from above resmod_c_glm_cal <- calibrate(resmod_c_glm) test_that("calibrate() GLM ClassificationRes succeeds", { expect_s7_class(resmod_c_glm_cal, CalibratedClassificationRes) }) ## {GLM}[predict] ---- test_that("predict() GLM CalibratedClassificationRes succeeds", { predicted_cal <- predict(resmod_c_glm_cal, features(datc2_test)) expect_type(predicted_cal, "double") expect_length(predicted_cal, nrow(datc2_test)) }) ## {CART}[calibrate] ---- # Using resmodt_c_cart from above resmodt_c_cart_cal <- calibrate(resmodt_c_cart) test_that("calibrate() CART ClassificationRes succeeds", { expect_s7_class(resmodt_c_cart_cal, CalibratedClassificationRes) }) ## {CART}[predict] ---- test_that("predict() CART CalibratedClassificationRes succeeds", { predicted_cal <- predict(resmodt_c_cart_cal, features(datc2_test)) expect_type(predicted_cal, "double") expect_length(predicted_cal, nrow(datc2_test)) }) # %% Test preprocessing in train() is applied to test data in predict() ---- ## {GLM}[train] Preprocessing ---- mod_c_glm_pp <- train( x = datc2_train, dat_test = datc2_test, algorithm = "glm", preprocessor = setup_Preprocessor( scale = TRUE, center = TRUE ) ) test_that("train() with preprocessor creates a model with the preprocessor", { expect_s7_class(mod_c_glm_pp, Classification) expect_true(!is.null(mod_c_glm_pp@preprocessor)) }) ================================================ FILE: tests/testthat/test_Theme.R ================================================ # test_Theme.R # ::rtemis:: # 2025 EDG rtemis.org # Theme ---- test_that("Theme succeeds", { expect_s7_class(Theme(), Theme) }) # theme_black ---- test_that("theme_black succeeds", { expect_s7_class(theme_black(), Theme) }) # theme_blackgrid test_that("theme_blackgrid succeeds", { expect_s7_class(theme_blackgrid(), Theme) }) # theme_light ---- test_that("theme_light succeeds", { expect_s7_class(theme_white(), Theme) }) # Test `$` and `[[` methods ---- theme <- theme_darkgraygrid() test_that("Theme$ and Theme[[ succeed", { expect_equal(theme[["fg"]], "#ffffff") expect_equal(theme[["fg"]], theme[["fg"]]) }) ================================================ FILE: tests/testthat/test_Tuner.R ================================================ # test_Tuner.R # ::rtemis:: # 2025 EDG rtemis.org # Note: Tuning is tested in test_Supervised.R with `train()` # TunerConfig ---- tn_pr <- setup_GridSearch() tn_pr desc(tn_pr) test_that("TunerConfig succeeds", { expect_s7_class(TunerConfig(), TunerConfig) }) # setup_GridSearch() ---- test_that("setup_GridSearch() succeeds", { expect_s7_class(setup_GridSearch(), GridSearchConfig) }) ================================================ FILE: tests/testthat/test_checks.R ================================================ # test_checks.R # ::rtemis:: # 2025 EDG rtemis.org # Test do_call ---- test_that("do_call() succeeds", { expect_equal(do_call(sum, list(1, 2, 3)), 6) }) ================================================ FILE: tests/testthat/test_colorsystem.R ================================================ # test_colorsystem.R # ::rtemis:: # 2025 EDG rtemis.org # show_col ---- x <- list( highlight_col = highlight_col, col_object = col_object, col_outer = col_outer, col_tuner = col_tuner, col_info = col_info ) out <- show_col(x, title = "rtemis Color System") test_that("show_col() works", { expect_true(is.character(out)) }) # fmt_gradient ---- out <- fmt_gradient( "Supervised", colors = c(rtemis_teal, rtemis_light_teal), bold = TRUE ) test_that("fmt_gradient() works", { expect_true(is.character(out)) }) ================================================ FILE: tests/testthat/test_draw.R ================================================ # test_draw.R # ::rtemis:: # 2025 EDG rtemis.org # draw_3Dscatter ---- test_that("draw_3Dscatter creates a plotly object and saves file", { # Check whether plotly and kaleido are available in reticulate temp_dir <- withr::local_tempdir() if ( !requireNamespace("reticulate", quietly = TRUE) || !reticulate::py_module_available("plotly") || !reticulate::py_module_available("kaleido") ) { temp_file <- NULL } else { temp_file <- file.path(temp_dir, "draw_3Dscatter.pdf") } # Create the plot with file output p <- draw_3Dscatter( iris, group = iris$Species, theme = theme_darkgraygrid(), filename = temp_file ) # Test that plotly object is created expect_s3_class(p, "plotly") # Test that file was successfully created by plotly/kaleido (only if temp_file is not NULL) if (!is.null(temp_file)) { expect_true(file.exists(temp_file)) # Test that the file has content (not empty) file_info <- file.info(temp_file) expect_true(file_info$size > 0) # Test that it's a valid PDF file (starts with PDF header) file_content <- readBin(temp_file, "raw", n = 4) expect_equal(rawToChar(file_content), "%PDF") } }) # draw_bar ---- test_that("draw_bar creates a plotly object", { p <- draw_bar(VADeaths, legend_xy = c(0, 1)) expect_s3_class(p, "plotly") }) # draw_box ---- test_that("draw_box creates a plotly object", { p <- draw_box(iris[, 1:4], group = iris[["Species"]], annotate_n = TRUE) expect_s3_class(p, "plotly") }) # draw_calibration ---- test_that("draw_calibration creates a plotly object", { # Create a simple binary classification example set.seed(123) true_labels <- factor(sample(c("A", "B"), size = 100, replace = TRUE)) predicted_prob <- runif(100) p <- draw_calibration(true_labels, predicted_prob) expect_s3_class(p, "plotly") }) # draw_confusion ---- test_that("draw_confusion creates a plotly object", { true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b")) predicted_labels <- factor(c( "a", "b", "a", "b", "b", "a", "b", "b", "b", "a" )) predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39) metrics <- classification_metrics( true_labels, predicted_labels, predicted_prob ) p <- draw_confusion(metrics) expect_s3_class(p, "plotly") }) # draw_dist ---- test_that("draw_dist creates a plotly object", { p <- draw_dist(iris[["Sepal.Length"]], group = iris[["Species"]]) expect_s3_class(p, "plotly") }) # draw_heatmap ---- test_that("draw_heatmap creates a plotly object", { x <- rnormmat(200, 20) xcor <- cor(x) p <- draw_heatmap(xcor) expect_s3_class(p, "plotly") }) # draw_leaflet ---- test_that("draw_leaflet creates a leaflet object", { fips <- c(06075, 42101) population <- c(874961, 1579000) names <- c("SF", "Philly") p <- draw_leaflet(fips, population, names) expect_s3_class(p, "leaflet") }) # draw_pie ---- test_that("draw_pie creates a plotly object", { p <- draw_pie(VADeaths[, 1, drop = FALSE]) expect_s3_class(p, "plotly") }) # draw_protein ---- test_that("draw_protein creates a plotly object", { tau <- c( "M", "A", "E", "P", "R", "Q", "E", "F", "E", "V", "M", "E", "D", "H", "A", "G", "T", "Y", "G", "L" ) p <- draw_protein(tau) expect_s3_class(p, "plotly") }) # draw_pvals ---- test_that("draw_pvals creates a plotly object", { p <- draw_pvals( c(0.01, 0.02, 0.03), xnames = c("Feature1", "Feature2", "Feature3") ) expect_s3_class(p, "plotly") }) # draw_scatter ---- test_that("draw_scatter creates a plotly object", { p <- draw_scatter( iris[["Sepal.Length"]], iris[["Petal.Length"]], group = iris[["Species"]], fit = "gam", se_fit = TRUE ) expect_s3_class(p, "plotly") }) # draw_spectrogram ---- test_that("draw_spectrogram creates a plotly object", { time <- seq(0, 1, length.out = 100) freq <- seq(1, 100, length.out = 100) power <- outer(time, freq, function(t, f) sin(t) * cos(f)) p <- draw_spectrogram( x = time, y = freq, z = power ) expect_s3_class(p, "plotly") }) # draw_survfit ---- test_that("draw_survfit creates a plotly object", { data(cancer, package = "survival") sf2 <- survival::survfit(survival::Surv(time, status) ~ sex, data = lung) p <- draw_survfit(sf2) expect_s3_class(p, "plotly") }) # draw_table ---- test_that("draw_table creates a plotly object", { df <- data.frame( Name = c("Alice", "Bob", "Charlie"), Age = c(25, 30, 35), Score = c(90.5, 85.0, 88.0) ) p <- draw_table( df, main = "Sample Table", main_col = "#00b2b2" ) expect_s3_class(p, "plotly") }) # draw_ts ---- test_that("draw_ts creates a plotly object", { time1 <- sample(seq( as.Date("2020-03-01"), as.Date("2020-07-23"), length.out = 100 )) time2 <- sample(seq( as.Date("2020-05-01"), as.Date("2020-09-23"), length.out = 140 )) time <- c(time1, time2) x <- c(rnorm(100), rnorm(140, 1, 1.5)) group <- c(rep("Alpha", 100), rep("Beta", 140)) p <- draw_ts(x, time, 7, group) expect_s3_class(p, "plotly") }) # draw_varimp ---- test_that("draw_varimp creates a plotly object", { x <- rnorm(10) names(x) <- paste0("Feature_", seq(x)) p <- draw_varimp(x) expect_s3_class(p, "plotly") p_h <- draw_varimp(x, orientation = "h") expect_s3_class(p_h, "plotly") }) # draw_volcano ---- test_that("draw_volcano creates a plotly object", { set.seed(2019) x <- rnorm(100, mean = 0.5, sd = 2) pvals <- runif(100, min = 0, max = 0.1) p <- draw_volcano(x, pvals) expect_s3_class(p, "plotly") }) # draw_xt ---- test_that("draw_xt creates a plotly object", { datetime <- seq( as.POSIXct("2020-01-01 00:00"), as.POSIXct("2020-01-02 00:00"), by = "hour" ) df <- data.frame( datetime = datetime, value1 = rnorm(length(datetime)), value2 = rnorm(length(datetime)) ) p <- draw_xt(df, x = df[, 1], y = df[, 2:3]) expect_s3_class(p, "plotly") }) ================================================ FILE: tests/testthat/test_idx.R ================================================ # test_idx.R # ::rtemis:: # 2025 EDG rtemis.org # Packages ---- library(data.table) # Data ---- xdf <- iris xdt <- as.data.table(iris) idx <- c("Sepal.Length", "Species") idi <- c(1L, 5L) # Test inc(data.frame) ---- xdf[, idx, drop = FALSE] xdf[, idi, drop = FALSE] inc(xdf, idx) inc(xdf, idi) test_that("inc(data.frame) works", { expect_equal(xdf[, idx, drop = FALSE], inc(xdf, idx)) expect_equal(xdf[, idi, drop = FALSE], inc(xdf, idi)) expect_equal(inc(xdf, idx), inc(xdf, idi)) }) # Test inc(data.table) ---- xdt[, ..idx] xdt[, idx, with = FALSE] xdt[, ..idi] xdt[, idi, with = FALSE] inc(xdt, idx) inc(xdt, idi) test_that("inc(data.table) works", { expect_equal(xdt[, ..idx], inc(xdt, idx)) expect_equal(xdt[, ..idi], inc(xdt, idi)) expect_equal(inc(xdt, idx), inc(xdt, idi)) }) # Test exc(data.frame) ---- xdf[, -which(names(xdf) %in% idx)] xdf[, -idi] exc(xdf, idx) exc(xdf, idi) test_that("exc(data.frame) works", { expect_equal(xdf[, -which(names(xdf) %in% idx)], exc(xdf, idx)) expect_equal(xdf[, -idi], exc(xdf, idi)) }) # Test exc(data.table) ---- xdt[, !..idx] xdt[, !idx, with = FALSE] xdt[, !..idi] xdt[, !idi, with = FALSE] exc(xdt, idx) test_that("exc(data.table) works", { expect_equal(xdt[, !..idx, with = FALSE], exc(xdt, idx)) }) ================================================ FILE: tests/testthat/test_massGLM.R ================================================ # test_MassGLM.R # ::rtemis:: # 2025 EDG rtemis.org # library(rtemis) # library(data.table) # library(testthat) set.seed(2022) n <- 40L y <- data.table(rnormmat(500, n)) x <- data.table( x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500), x2 = y[[21]] + rnorm(500) ) # massGLM ---- massmod <- massGLM(x, y) test_that("massGLM creates MassGLM object", { expect_s7_class(massmod, MassGLM) }) # plot.MassGLM ---- test_that("plot.MassGLM creates plotly object", { plt <- plot(massmod) expect_s3_class(plt, "plotly") }) # plot_manhattan.MassGLM ---- test_that("plot_manhattan.MassGLM creates plotly object", { plt <- plot_manhattan(massmod) expect_s3_class(plt, "plotly") }) ================================================ FILE: tests/testthat/test_msg_sink.R ================================================ # test_msg_sink.R # ::rtemis:: # 2026- EDG rtemis.org # set_msg_sink ---- test_that("set_msg_sink() accepts a function", { on.exit(set_msg_sink(NULL), add = TRUE) set_msg_sink(function(m) NULL) expect_true(is.function(get_msg_sink())) }) test_that("set_msg_sink() accepts NULL", { on.exit(set_msg_sink(NULL), add = TRUE) set_msg_sink(function(m) NULL) set_msg_sink(NULL) expect_null(get_msg_sink()) }) test_that("set_msg_sink() rejects non-function, non-NULL inputs", { on.exit(set_msg_sink(NULL), add = TRUE) expect_error(set_msg_sink("a string")) expect_error(set_msg_sink(123)) expect_error(set_msg_sink(list())) }) test_that("set_msg_sink() returns previous sink invisibly", { on.exit(set_msg_sink(NULL), add = TRUE) fn1 <- function(m) NULL fn2 <- function(m) NULL set_msg_sink(NULL) prev1 <- set_msg_sink(fn1) expect_null(prev1) prev2 <- set_msg_sink(fn2) expect_identical(prev2, fn1) }) # get_msg_sink ---- test_that("get_msg_sink() returns NULL when no sink is set", { on.exit(set_msg_sink(NULL), add = TRUE) set_msg_sink(NULL) expect_null(get_msg_sink()) }) test_that("get_msg_sink() returns the current sink function", { on.exit(set_msg_sink(NULL), add = TRUE) fn <- function(m) NULL set_msg_sink(fn) expect_identical(get_msg_sink(), fn) }) # with_msg_sink ---- test_that("with_msg_sink() sets and restores", { on.exit(set_msg_sink(NULL), add = TRUE) set_msg_sink(NULL) with_msg_sink(function(m) NULL, { expect_true(is.function(get_msg_sink())) }) expect_null(get_msg_sink()) }) test_that("with_msg_sink() restores even on error", { on.exit(set_msg_sink(NULL), add = TRUE) set_msg_sink(NULL) expect_error( with_msg_sink(function(m) NULL, stop("boom")), "boom" ) expect_null(get_msg_sink()) }) test_that("with_msg_sink() preserves a previously set outer sink", { on.exit(set_msg_sink(NULL), add = TRUE) outer <- function(m) NULL inner <- function(m) NULL set_msg_sink(outer) with_msg_sink(inner, { expect_identical(get_msg_sink(), inner) }) expect_identical(get_msg_sink(), outer) }) # msg() / msg0() / msgstart() / msgdone() routing ---- test_that("msg() emits a console message when no sink is set", { on.exit(set_msg_sink(NULL), add = TRUE) set_msg_sink(NULL) expect_message(msg("hello")) }) test_that("msg() routes to the sink and suppresses console output", { captured <- list() with_msg_sink( function(m) captured[[length(captured) + 1L]] <<- m, { expect_silent(msg("hello world")) } ) expect_length(captured, 1L) expect_equal(captured[[1L]][["text"]], "hello world") expect_equal(captured[[1L]][["level"]], "info") expect_true(is.character(captured[[1L]][["ts"]])) }) test_that("msg0() routes to the sink with sep = ''", { captured <- list() with_msg_sink( function(m) captured[[length(captured) + 1L]] <<- m, { expect_silent(msg0("hello", "world")) } ) expect_length(captured, 1L) expect_equal(captured[[1L]][["text"]], "helloworld") expect_equal(captured[[1L]][["level"]], "info") }) test_that("msgstart() routes to the sink with level = 'start'", { captured <- list() with_msg_sink( function(m) captured[[length(captured) + 1L]] <<- m, { expect_silent(msgstart("Starting...")) } ) expect_length(captured, 1L) expect_equal(captured[[1L]][["text"]], "Starting...") expect_equal(captured[[1L]][["level"]], "start") }) test_that("msgdone() routes to the sink with level = 'done' and a caller", { captured <- list() with_msg_sink( function(m) captured[[length(captured) + 1L]] <<- m, { expect_silent(msgdone()) } ) expect_length(captured, 1L) expect_equal(captured[[1L]][["level"]], "done") # caller may be NA depending on test harness call stack — assert type only expect_true( is.character(captured[[1L]][["caller"]]) || is.na(captured[[1L]][["caller"]]) ) }) test_that("multiple msg variants accumulate as separate sink events in order", { captured <- list() with_msg_sink( function(m) captured[[length(captured) + 1L]] <<- m, { msg("one") msg0("two") msgstart("three") msgdone() } ) expect_length(captured, 4L) expect_equal( vapply(captured, `[[`, character(1L), "level"), c("info", "info", "start", "done") ) expect_equal(captured[[1L]][["text"]], "one") expect_equal(captured[[2L]][["text"]], "two") expect_equal(captured[[3L]][["text"]], "three") }) test_that("verbosity = 0 short-circuits before reaching the sink", { captured <- list() with_msg_sink( function(m) captured[[length(captured) + 1L]] <<- m, { msg("ignored", verbosity = 0L) msg0("ignored", verbosity = 0L) } ) expect_length(captured, 0L) }) test_that("clearing the sink restores console output", { on.exit(set_msg_sink(NULL), add = TRUE) set_msg_sink(function(m) NULL) expect_silent(msg("under sink")) set_msg_sink(NULL) expect_message(msg("after clear")) }) ================================================ FILE: tests/testthat/test_strings.R ================================================ # test_strings.R # ::rtemis:: # 2025 EDG rtemis.org # repr_ls ---- x <- list( a = 1:5, b = letters[1:5], c = rnorm(5) ) out <- repr_ls(x, title = "Test List") test_that("repr_ls() works", { expect_true(is.character(out)) }) ## Long list ---- x <- list( a = 1:100, b = letters[1:100], c = iris, d = sample(letters, 100, replace = TRUE), e = runif(100), f = setup_Preprocessor(), g = rpois(100, 2), h = rbinom(100, 10, 0.5), i = setup_PCA(), j = rnorm(100), k = rnorm(100), l = setup_LightCART() ) test_that("repr_ls() handles long lists", { expect_true(is.character(repr_ls(x, limit = 5L))) expect_true(is.character(repr_ls(x, limit = -1L))) }) ================================================ FILE: tests/testthat/test_to_json.R ================================================ # test_to_json.R # ::rtemis:: # 2026- EDG rtemis.org skip_if_not_installed("jsonlite") library(data.table) # Data ---- n <- 100L x <- rnormmat(n, 3L, seed = 2026L) y <- x[, 1L] + x[, 2L] + rnorm(n) datr <- data.table(x, y) # Generic ---- test_that("to_json() is a registered S7 generic", { expect_true(inherits(to_json, "S7_generic")) }) # Supervised (Regression) ---- mod_r_glm <- train(x = datr, algorithm = "glm") test_that("to_json(Regression) returns a list with .class and core fields", { j <- to_json(mod_r_glm) expect_type(j, "list") expect_equal(j[[".class"]], "Regression") expect_equal(j[["type"]], "Regression") expect_true(is.character(j[["xnames"]])) expect_equal(j[["n_features"]], length(mod_r_glm@xnames)) }) test_that("to_json(Regression) recurses into nested S7 props with .class tags", { j <- to_json(mod_r_glm) expect_true(is.list(j[["metrics_training"]])) expect_true(".class" %in% names(j[["metrics_training"]])) expect_true(is.list(j[["execution_config"]])) expect_equal(j[["execution_config"]][[".class"]], "ExecutionConfig") }) test_that("to_json(Regression) is JSON-serializable and round-trips", { j <- to_json(mod_r_glm) txt <- jsonlite::toJSON(j, auto_unbox = TRUE, na = "null", null = "null") expect_true(jsonlite::validate(txt)) parsed <- jsonlite::fromJSON(txt, simplifyVector = FALSE) expect_equal(parsed[[".class"]], "Regression") expect_equal(parsed[["type"]], "Regression") }) test_that("to_json(Regression) excludes model, raw vectors, session_info", { j <- to_json(mod_r_glm) expect_false("model" %in% names(j)) expect_false("y_training" %in% names(j)) expect_false("predicted_training" %in% names(j)) expect_false("session_info" %in% names(j)) expect_false("extra" %in% names(j)) }) test_that("to_json(Regression) drops NULL fields cleanly", { j <- to_json(mod_r_glm) expect_true(all(vapply(j, function(v) !is.null(v), logical(1L)))) }) # Supervised (Classification) ---- datc <- data.frame(iris[51:150, ]) datc$Species <- factor(datc$Species) mod_c_glm <- train(x = datc, algorithm = "glm") test_that("to_json(Classification) tags class and includes binclasspos", { j <- to_json(mod_c_glm) expect_equal(j[[".class"]], "Classification") expect_true("binclasspos" %in% names(j)) expect_true(is.integer(j[["binclasspos"]])) }) # SupervisedRes ---- resmod <- train( x = datr, algorithm = "glm", outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold") ) test_that("to_json(RegressionRes) returns a list with .class and resample summary", { j <- to_json(resmod) expect_type(j, "list") expect_equal(j[[".class"]], "RegressionRes") expect_equal(j[["n_resamples"]], 3L) expect_true(is.list(j[["outer_resampler"]])) expect_true(is.list(j[["metrics_training"]])) expect_true(is.list(j[["metrics_test"]])) }) test_that("to_json(RegressionRes) is JSON-serializable", { j <- to_json(resmod) txt <- jsonlite::toJSON(j, auto_unbox = TRUE, na = "null", null = "null") expect_true(jsonlite::validate(txt)) parsed <- jsonlite::fromJSON(txt, simplifyVector = FALSE) expect_equal(parsed[[".class"]], "RegressionRes") expect_equal(parsed[["n_resamples"]], 3L) }) test_that("to_json(RegressionRes) excludes models list (only summary count)", { j <- to_json(resmod) expect_false("models" %in% names(j)) }) # Default method ---- test_that("default to_json walks props and tags .class for arbitrary S7 objects", { exec <- setup_ExecutionConfig() j <- to_json(exec) expect_type(j, "list") expect_equal(j[[".class"]], "ExecutionConfig") }) ================================================ FILE: tests/testthat.R ================================================ library(rtemis) library(testthat) test_check("rtemis")