Showing preview only (1,516K chars total). Download the full file or copy to clipboard to get everything.
Repository: egenn/rtemis
Branch: main
Commit: ee040e1ef75f
Files: 296
Total size: 1.4 MB
Directory structure:
gitextract_jpidflln/
├── .Rbuildignore
├── .github/
│ ├── .gitignore
│ ├── CONTRIBUTING.md
│ └── workflows/
│ └── R-CMD-check.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── NEWS.md
├── R/
│ ├── 00_S7init.R
│ ├── 01_ExecutionConfig.R
│ ├── 02_Hyperparameters.R
│ ├── 03_Metrics.R
│ ├── 04_Preprocessor.R
│ ├── 05_Resampler.R
│ ├── 06_Tuner.R
│ ├── 07_Supervised.R
│ ├── 08_MassUni.R
│ ├── 09_ClusteringConfig.R
│ ├── 10_Clustering.R
│ ├── 11_DecompositionConfig.R
│ ├── 12_Decomposition.R
│ ├── 13_Themes.R
│ ├── 14_SuperConfig.R
│ ├── 15_CheckData.R
│ ├── 16_S7utils.R
│ ├── algorithmDB.R
│ ├── calibrate.R
│ ├── check_data.R
│ ├── check_input_data.R
│ ├── cluster.R
│ ├── cluster_CMeans.R
│ ├── cluster_DBSCAN.R
│ ├── cluster_flexclust.R
│ ├── data_xt_example.R
│ ├── ddSci.R
│ ├── ddb.R
│ ├── decomp.R
│ ├── decomp_ICA.R
│ ├── decomp_Isomap.R
│ ├── decomp_NMF.R
│ ├── decomp_PCA.R
│ ├── decomp_UMAP.R
│ ├── decomp_tSNE.R
│ ├── draw_3Dscatter.R
│ ├── draw_bar.R
│ ├── draw_box.R
│ ├── draw_calibration.R
│ ├── draw_confusion.R
│ ├── draw_dist.R
│ ├── draw_graphd3.R
│ ├── draw_graphjs.R
│ ├── draw_heatmap.R
│ ├── draw_leaflet.R
│ ├── draw_pie.R
│ ├── draw_protein.R
│ ├── draw_pvals.R
│ ├── draw_roc.R
│ ├── draw_scatter.R
│ ├── draw_spectrogram.R
│ ├── draw_survfit.R
│ ├── draw_table.R
│ ├── draw_ts.R
│ ├── draw_varimp.R
│ ├── draw_volcano.R
│ ├── draw_xt.R
│ ├── fmt.R
│ ├── ifw.R
│ ├── massGLM.R
│ ├── metrics.R
│ ├── msg.R
│ ├── preprocess.R
│ ├── present.R
│ ├── read.R
│ ├── resample.R
│ ├── rtemis-package.R
│ ├── rtemis_color_system.R
│ ├── theme.R
│ ├── train.R
│ ├── train_CART.R
│ ├── train_GAM.R
│ ├── train_GLM.R
│ ├── train_GLMNET.R
│ ├── train_Isotonic.R
│ ├── train_LightCART.R
│ ├── train_LightGBM.R
│ ├── train_LightRF.R
│ ├── train_LightRuleFit.R
│ ├── train_Ranger.R
│ ├── train_SVM.R
│ ├── train_TabNet.R
│ ├── tune.R
│ ├── tune_GridSearch.R
│ ├── utils.R
│ ├── utils_art.R
│ ├── utils_async.R
│ ├── utils_checks.R
│ ├── utils_color.R
│ ├── utils_data.R
│ ├── utils_data.table.R
│ ├── utils_date.R
│ ├── utils_df.R
│ ├── utils_exec.R
│ ├── utils_files.R
│ ├── utils_html.R
│ ├── utils_io.R
│ ├── utils_lightgbm.R
│ ├── utils_palettes.R
│ ├── utils_plot.R
│ ├── utils_plotly.R
│ ├── utils_print.R
│ ├── utils_rt.R
│ ├── utils_rules.R
│ ├── utils_strings.R
│ ├── utils_supervised.R
│ ├── utils_uniprot.R
│ ├── utils_xt.R
│ └── zzz.R
├── README.md
├── data/
│ └── xt_example.rda
├── data-raw/
│ └── create_xt_example.R
├── inst/
│ ├── CITATION
│ ├── extdata/
│ │ ├── us-counties.rds
│ │ └── us-states.rds
│ └── resources/
│ ├── aminoacids.rds
│ ├── rtemis.utf8
│ └── rtemis2.utf8
├── man/
│ ├── available_algorithms.Rd
│ ├── available_draw.Rd
│ ├── available_themes.Rd
│ ├── calibrate.Rd
│ ├── check_data.Rd
│ ├── choose_theme.Rd
│ ├── class_imbalance.Rd
│ ├── classification_metrics.Rd
│ ├── clean_colnames.Rd
│ ├── clean_names.Rd
│ ├── cluster.Rd
│ ├── col2grayscale.Rd
│ ├── color_adjust.Rd
│ ├── ddSci.Rd
│ ├── ddb_collect.Rd
│ ├── ddb_data.Rd
│ ├── decomp.Rd
│ ├── describe.Rd
│ ├── df_movecolumn.Rd
│ ├── df_nunique_perfeat.Rd
│ ├── dot-list_to_Hyperparameters.Rd
│ ├── dot-list_to_ResamplerConfig.Rd
│ ├── dot-list_to_TunerConfig.Rd
│ ├── draw_3Dscatter.Rd
│ ├── draw_bar.Rd
│ ├── draw_box.Rd
│ ├── draw_calibration.Rd
│ ├── draw_confusion.Rd
│ ├── draw_dist.Rd
│ ├── draw_fit.Rd
│ ├── draw_graphD3.Rd
│ ├── draw_graphjs.Rd
│ ├── draw_heatmap.Rd
│ ├── draw_leaflet.Rd
│ ├── draw_pie.Rd
│ ├── draw_protein.Rd
│ ├── draw_pvals.Rd
│ ├── draw_roc.Rd
│ ├── draw_scatter.Rd
│ ├── draw_spectrogram.Rd
│ ├── draw_survfit.Rd
│ ├── draw_table.Rd
│ ├── draw_ts.Rd
│ ├── draw_varimp.Rd
│ ├── draw_volcano.Rd
│ ├── draw_xt.Rd
│ ├── dt_describe.Rd
│ ├── dt_inspect_types.Rd
│ ├── dt_keybin_reshape.Rd
│ ├── dt_merge.Rd
│ ├── dt_names_by_attr.Rd
│ ├── dt_nunique_perfeat.Rd
│ ├── dt_pctmatch.Rd
│ ├── dt_pctmissing.Rd
│ ├── dt_set_autotypes.Rd
│ ├── dt_set_clean_all.Rd
│ ├── dt_set_cleanfactorlevels.Rd
│ ├── dt_set_logical2factor.Rd
│ ├── dt_set_one_hot.Rd
│ ├── exc.Rd
│ ├── feature_matrix.Rd
│ ├── feature_names.Rd
│ ├── features.Rd
│ ├── get_factor_names.Rd
│ ├── get_mode.Rd
│ ├── get_msg_sink.Rd
│ ├── get_palette.Rd
│ ├── getnames.Rd
│ ├── getnamesandtypes.Rd
│ ├── grapes-BC-grapes.Rd
│ ├── inc.Rd
│ ├── index_col_by_attr.Rd
│ ├── init_project_dir.Rd
│ ├── inspect.Rd
│ ├── inspect_type.Rd
│ ├── is_constant.Rd
│ ├── labelify.Rd
│ ├── massGLM.Rd
│ ├── matchcases.Rd
│ ├── mgetnames.Rd
│ ├── names_by_class.Rd
│ ├── one_hot2factor.Rd
│ ├── outcome.Rd
│ ├── outcome_name.Rd
│ ├── plot.MassGLM.Rd
│ ├── plot_manhattan.Rd
│ ├── plot_roc.Rd
│ ├── plot_true_pred.Rd
│ ├── plot_varimp.Rd
│ ├── preprocess.Rd
│ ├── preprocessed.Rd
│ ├── present.Rd
│ ├── previewcolor.Rd
│ ├── read.Rd
│ ├── read_config.Rd
│ ├── regression_metrics.Rd
│ ├── resample.Rd
│ ├── rnormmat.Rd
│ ├── rtemis-package.Rd
│ ├── rtemis_colors.Rd
│ ├── rtversion.Rd
│ ├── runifmat.Rd
│ ├── set_msg_sink.Rd
│ ├── set_outcome.Rd
│ ├── setdiffsym.Rd
│ ├── setup_CART.Rd
│ ├── setup_CMeans.Rd
│ ├── setup_DBSCAN.Rd
│ ├── setup_ExecutionConfig.Rd
│ ├── setup_GAM.Rd
│ ├── setup_GLM.Rd
│ ├── setup_GLMNET.Rd
│ ├── setup_GridSearch.Rd
│ ├── setup_HardCL.Rd
│ ├── setup_ICA.Rd
│ ├── setup_Isomap.Rd
│ ├── setup_Isotonic.Rd
│ ├── setup_KMeans.Rd
│ ├── setup_LightCART.Rd
│ ├── setup_LightGBM.Rd
│ ├── setup_LightRF.Rd
│ ├── setup_LightRuleFit.Rd
│ ├── setup_LinearSVM.Rd
│ ├── setup_NMF.Rd
│ ├── setup_NeuralGas.Rd
│ ├── setup_PCA.Rd
│ ├── setup_Preprocessor.Rd
│ ├── setup_RadialSVM.Rd
│ ├── setup_Ranger.Rd
│ ├── setup_Resampler.Rd
│ ├── setup_SuperConfig.Rd
│ ├── setup_SuperConfigLive.Rd
│ ├── setup_TabNet.Rd
│ ├── setup_UMAP.Rd
│ ├── setup_tSNE.Rd
│ ├── size.Rd
│ ├── table_column_attr.Rd
│ ├── theme.Rd
│ ├── to_json.Rd
│ ├── train.Rd
│ ├── uniprot_get.Rd
│ ├── with_msg_sink.Rd
│ ├── write_toml.Rd
│ ├── xt_example.Rd
│ └── xtdescribe.Rd
└── tests/
├── testthat/
│ ├── test_Calibration.R
│ ├── test_CheckData.R
│ ├── test_Clustering.R
│ ├── test_Decomposition.R
│ ├── test_ExecutionConfig.R
│ ├── test_Hyperparameters.R
│ ├── test_Metrics.R
│ ├── test_Preprocessor.R
│ ├── test_Resampler.R
│ ├── test_SuperConfig.R
│ ├── test_SuperConfigLive.R
│ ├── test_Supervised.R
│ ├── test_Theme.R
│ ├── test_Tuner.R
│ ├── test_checks.R
│ ├── test_colorsystem.R
│ ├── test_draw.R
│ ├── test_idx.R
│ ├── test_massGLM.R
│ ├── test_msg_sink.R
│ ├── test_strings.R
│ └── test_to_json.R
└── testthat.R
================================================
FILE CONTENTS
================================================
================================================
FILE: .Rbuildignore
================================================
^__dev$
^__validation$
^_pkgdown\.yml$
^.*\.code-workspace$
^.*\.Rcheck$
^.*\.tar.gz$
^[.]?air[.]toml$
^\.claude$
^\.DS_Store$
^\.gemini$
^\.github$
^\.lintr$
^\.rtms-instructions\.md$
^\.vscode$
^cran-comments\.md$
^LICENSE\.md$
^data-raw$
^dev$
^docs$
^specs$
^AGENTS\.md$
^NEWS\.md$
^pkgdown$
^Makefile$
^SKILL\.md$
================================================
FILE: .github/.gitignore
================================================
*.html
================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing to rtemis
Thank you for your interest in contributing to **rtemis**! This guide will help you report issues effectively.
## Before Opening an Issue
### Update to Latest Version
Ensure you're using the latest version of rtemis (v0.99+). Many issues may already be fixed in recent updates.
```r
# Install from CRAN
install.packages("rtemis")
# Install from GitHub
pak::pak("rtemis-org/rtemis")
# Install from r-universe
install.packages('rtemis', repos = 'https://rtemis-org.r-universe.dev')
# Check your version
packageVersion("rtemis")
```
### Check Existing Issues
Please search [existing issues](https://github.com/rtemis-org/rtemis/issues) to see if your problem or suggestion has already been reported. If you find a related issue, add a comment with any additional information.
### Review Documentation
- **API Documentation**: https://docs.rtemis.org/r/ml-api/
- **General Documentation**: https://docs.rtemis.org/r/ml
## Opening an Issue
### Issue Types
We welcome the following types of issues:
1. **🐛 Bug Reports**: Unexpected behavior, errors, or crashes. (Use `[BUG]` in the title)
2. **✨ Feature Requests**: Ideas for new functionality. (Use `[FEATURE]` in the title)
3. **📚 Documentation**: Improvements to docs or examples. (Use `[DOC]` in the title)
4. **❓ Questions**: Use [Discussions](https://github.com/rtemis-org/rtemis/discussions) for usage questions
### Bug Reports
A good bug report should include:
#### Required Information
1. **rtemis version**: Output of `packageVersion("rtemis")`
2. **R version**: Output of `R.version.string`
3. **Operating System**: e.g., macOS 14.5, Ubuntu 22.04, Windows 11
4. **Clear description**: What did you expect vs. what actually happened?
#### Reproducible Example
**Critical**: Provide a minimal reproducible example. Use the template below:
```r
# Load required packages
library(rtemis)
library(data.table) # if needed
# Create minimal data
set.seed(2025)
n <- 100
x <- rnormmat(n, 3)
y <- x[, 1] + x[, 2] + rnorm(n)
dat <- data.frame(x, y)
# Demonstrate the issue
mod <- train(
x = dat,
algorithm = "glm"
)
# Expected: Model trains successfully
# Actual: Error message...
```
#### Error Messages
Include **complete error messages** with full stack traces. If the error is verbose, use a code block:
```
Error in train(...):
! You must define either `hyperparameters` or `algorithm`.
```
#### Session Info (for complex issues)
For crashes or environment-specific issues, include:
```r
sessionInfo()
```
### Feature Requests
For feature requests, please describe:
1. **Use case**: What problem would this solve?
2. **Proposed solution**: How should it work?
3. **Alternatives considered**: What workarounds exist currently?
4. **Impact**: Who would benefit from this feature?
**Example:**
> **Use case**: I frequently need to train models with time-series cross-validation but the current resampling methods don't preserve temporal order.
>
> **Proposed solution**: Add `setup_TimeSeriesCV()` that creates train/test splits respecting time ordering.
>
> **Alternatives**: Currently using custom resampling with `outer_resampling` parameter, but it's verbose and error-prone.
### Documentation Issues
For documentation improvements:
1. **Location**: Specify which page or function (e.g., `?train`, `?setup_GLMNET`)
2. **Problem**: What's unclear, incorrect, or missing?
3. **Suggestion**: How could it be improved?
## Version-Specific Notes
### rtemis 0.99+ vs. rtemisalpha (Legacy)
**Important**: This repository contains **rtemis 0.99+**, a complete rewrite using S7 classes. If you're using the legacy version (`rtemisalpha`), please note:
- Legacy issues should reference [rtemis-legacy](https://github.com/rtemis-org/rtemis-legacy) (unmaintained)
- Migration questions are welcome here
- API differences are expected (see README.md for major changes)
### Active Development
rtemis 0.99+ is under active development. Features may change between releases. When reporting issues:
- Specify your branch if not using `main` (check with `git branch`)
- Note if the issue appears in a specific algorithm (some are being ported from the legacy version)
## What Happens Next?
1. **Triage**: Maintainers will review and label your issue
2. **Discussion**: We may ask for clarification or additional details
3. **Resolution**:
- **Bugs**: Fixed in upcoming releases, referenced in commit messages
- **Features**: Evaluated for inclusion in roadmap
- **Questions**: Answered or redirected to appropriate resources
## Code of Conduct
Be respectful and constructive. We're all here to improve rtemis together.
## Pull Requests
While this guide focuses on issues, pull requests are welcome! Key points:
- Discuss major changes in an issue first
- Follow existing code style (S7 classes, roxygen2 documentation)
- All `@param` must follow format: `Class: Description ending with period.`
- Include tests for new functionality
- Update documentation as needed
## Questions?
- **General usage**: [GitHub Discussions](https://github.com/rtemis-org/rtemis/discussions)
- **Bug reports/features**: [GitHub Issues](https://github.com/rtemis-org/rtemis/issues)
- **Security issues**: Contact maintainers directly (see DESCRIPTION file)
---
Thank you for contributing to rtemis.
================================================
FILE: .github/workflows/R-CMD-check.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
pull_request:
push:
branches: [main]
workflow_dispatch:
name: R-CMD-check
permissions: read-all
concurrency:
group: R-CMD-check-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
R-CMD-check:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-pandoc@v2
- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check
- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
build_args: 'c("--no-manual", "--compact-vignettes=gs+qpdf")'
error-on: '"note"'
================================================
FILE: .gitignore
================================================
# Dev
__dev/
__validation/
dev/
__out/
specs/
# Mac OS
.DS_Store
# VS Code
.vscode
*.code-workspace
# R History files
.Rhistory
.Rapp.history
# Output files from R CMD build
/*.tar.gz
# Output files from R CMD check
/*.Rcheck/
# pkgdown
_pkgdown.yml
pkgdown/
# Air
air.toml
# produced vignettes
vignettes/*.html
vignettes/*.pdf
# Temporary files created by R markdown
*.utf8.md
*.knit.md
# lintr
.lintr
# CRAN
cran-comments.md
# Manual
*.pdf
# Assistants
AGENTS.md
.claude/
SKILL.md
================================================
FILE: DESCRIPTION
================================================
Package: rtemis
Version: 1.2.0
Title: Machine Learning and Visualization
Date: 2026-05-12
Authors@R: person(given = "E.D.", family = "Gennatas", role = c("aut", "cre", "cph"),
email = "gennatas@gmail.com", comment = c(ORCID = "0000-0001-9280-3609"))
Description: Machine learning and visualization package with an 'S7' backend
featuring comprehensive type checking and validation, paired with an efficient functional
user-facing API. train(), cluster(), and decomp() provide one-call access to supervised and
unsupervised learning. All configuration steps are performed using setup functions and
validated. A single call to train() handles preprocessing, hyperparameter tuning, and testing
with nested resampling. Supports 'data.frame', 'data.table', and 'tibble' inputs, parallel
execution, and interactive visualizations. The package first appeared in E.D. Gennatas (2017)
<https://repository.upenn.edu/entities/publication/d81892ea-3087-4b71-a6f5-739c58626d64>.
License: GPL (>= 3)
URL: https://www.rtemis.org, https://docs.rtemis.org/r/ml, https://docs.rtemis.org/r/ml-api/
BugReports: https://github.com/rtemis-org/rtemis/issues
ByteCompile: yes
Depends:
R (>= 4.1.0)
Imports:
grDevices,
graphics,
stats,
methods,
utils,
S7,
data.table,
future,
htmltools,
cli
Suggests:
arrow,
bit64,
car,
colorspace,
DBI,
dbscan,
dendextend (>= 0.18.0),
duckdb,
e1071,
farff,
fastICA,
flexclust,
future.apply,
future.mirai,
futurize,
geosphere,
ggplot2,
glmnet,
geojsonio,
glue,
grid,
gsubfn,
haven,
heatmaply,
htmlwidgets,
igraph,
jsonlite,
later,
leaflet,
leaps,
lightAUC,
lightgbm,
matrixStats,
mgcv,
mice,
mirai,
missRanger,
nanonext,
networkD3,
NMF,
openxlsx,
parallelly,
partykit,
plotly,
plumber,
pROC,
progressr,
psych,
pvclust,
ranger,
reactable,
readxl,
reticulate,
ROCR,
rpart,
Rtsne,
seqinr,
sf,
shapr,
survival,
tabnet,
threejs,
testthat (>= 3.0.0),
tibble,
timeDate,
toml,
torch,
uwot,
vegan,
vroom,
withr
Encoding: UTF-8
Config/testthat/edition: 3
Roxygen: list(markdown = TRUE)
LazyData: true
Config/roxygen2/version: 8.0.0
================================================
FILE: LICENSE.md
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.
================================================
FILE: Makefile
================================================
PKG := $(shell awk '/^Package:/{print $$2; exit}' DESCRIPTION)
R ?= R
RSCRIPT ?= Rscript
CHECK_DIR := $(PKG).Rcheck
TARBALL_GLOB := $(PKG)_*.tar.gz
msg = @printf '\033[38;2;108;163;160m[%s] %s\033[0m\n' "$$(date -u '+%Y-%m-%d %H:%M:%SZ')" "$(1)"
.DEFAULT_GOAL := help
.PHONY: help format document install test build check check-cran check-cran-no-tests site clean
help:
$(call msg,Available targets:)
@printf '%s\n' \
' format Format R code with air CLI (if available)' \
' document Generate roxygen2 documentation' \
' install Document and install the package locally with pak' \
' test Run testthat::test_local(stop_on_failure = TRUE)' \
' build Build the source tarball' \
' check Run R CMD check on the built tarball' \
' check-cran Run R CMD check --as-cran' \
' check-cran-no-tests Run R CMD check --as-cran --no-tests' \
' manual Build package manual' \
' site Build pkgdown site' \
' clean Remove tarballs and .Rcheck output'
format:
$(call msg,─── Formatting $(PKG) package... ───)
@if command -v air >/dev/null 2>&1; then \
air format .; \
else \
echo " Note: 'air' CLI not found — skipping R code formatting."; \
fi
$(call msg,Done)
document: format
$(call msg,─── Documenting $(PKG) package... ───)
$(RSCRIPT) -e "roxygen2::roxygenize()"
$(call msg,Done)
install: document
$(call msg,─── Installing $(PKG) package... ───)
$(RSCRIPT) -e "pak::local_install(upgrade = TRUE)"
$(call msg,Done)
test:
$(call msg,─── Running testthat tests for $(PKG)... ───)
$(RSCRIPT) -e "testthat::test_local(stop_on_failure = TRUE)"
$(call msg,Done)
build: clean
$(call msg,─── Building $(PKG) package... ───)
$(R) CMD build .
$(call msg,Done)
check: build
$(call msg,─── Running R CMD check on $(PKG)... ───)
$(R) CMD check $(TARBALL_GLOB)
rm -f $(TARBALL_GLOB)
$(call msg,Done)
check-cran: build
$(call msg,─── Running R CMD check --as-cran on $(PKG)... ───)
$(R) CMD check $(TARBALL_GLOB) --as-cran
rm -f $(TARBALL_GLOB)
$(call msg,Done)
check-cran-no-tests: build
$(call msg,─── Running R CMD check --as-cran on $(PKG)... ───)
$(R) CMD check $(TARBALL_GLOB) --as-cran --no-tests
rm -f $(TARBALL_GLOB)
$(call msg,Done)
manual:
$(call msg,─── Building manual for $(PKG)... ───)
$(R) CMD Rd2pdf . --output=$(PKG).pdf
$(call msg,Done)
site:
$(call msg,─── Building pkgdown site for $(PKG)... ───)
$(RSCRIPT) -e "pkgdown::build_site()"
$(call msg,Done)
clean:
$(call msg,─── Cleaning build artifacts... ───)
rm -rf $(CHECK_DIR)
rm -f $(TARBALL_GLOB)
$(call msg,Done)
================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand
S3method(plot,MassGLM)
export("%BC%")
export(.list_to_Hyperparameters)
export(.list_to_ResamplerConfig)
export(.list_to_TunerConfig)
export(available_clustering)
export(available_decomposition)
export(available_draw)
export(available_supervised)
export(available_themes)
export(calibrate)
export(check_data)
export(choose_theme)
export(class_imbalance)
export(classification_metrics)
export(clean_colnames)
export(clean_names)
export(cluster)
export(col2grayscale)
export(color_adjust)
export(ddSci)
export(ddb_collect)
export(ddb_data)
export(decomp)
export(describe)
export(df_movecolumn)
export(df_nunique_perfeat)
export(draw_3Dscatter)
export(draw_bar)
export(draw_box)
export(draw_calibration)
export(draw_confusion)
export(draw_dist)
export(draw_fit)
export(draw_graphD3)
export(draw_graphjs)
export(draw_heatmap)
export(draw_leaflet)
export(draw_pie)
export(draw_protein)
export(draw_pvals)
export(draw_roc)
export(draw_scatter)
export(draw_spectrogram)
export(draw_survfit)
export(draw_table)
export(draw_ts)
export(draw_varimp)
export(draw_volcano)
export(draw_xt)
export(dt_describe)
export(dt_inspect_types)
export(dt_keybin_reshape)
export(dt_merge)
export(dt_names_by_attr)
export(dt_nunique_perfeat)
export(dt_pctmatch)
export(dt_pctmissing)
export(dt_set_autotypes)
export(dt_set_clean_all)
export(dt_set_cleanfactorlevels)
export(dt_set_logical2factor)
export(dt_set_one_hot)
export(exc)
export(feature_matrix)
export(feature_names)
export(features)
export(get_factor_names)
export(get_mode)
export(get_msg_sink)
export(get_palette)
export(getcharacternames)
export(getdatenames)
export(getfactornames)
export(getlogicalnames)
export(getnames)
export(getnamesandtypes)
export(getnumericnames)
export(inc)
export(index_col_by_attr)
export(init_project_dir)
export(inspect)
export(inspect_type)
export(is_constant)
export(labelify)
export(massGLM)
export(matchcases)
export(mgetnames)
export(names_by_class)
export(one_hot2factor)
export(outcome)
export(outcome_name)
export(plot_manhattan)
export(plot_manhattan.MassGLM)
export(plot_roc)
export(plot_true_pred)
export(plot_varimp)
export(preprocess)
export(preprocess.class_tabular.Preprocessor)
export(preprocess.class_tabular.PreprocessorConfig)
export(preprocessed)
export(present)
export(previewcolor)
export(read)
export(read_config)
export(regression_metrics)
export(resample)
export(rnormmat)
export(rtemis_colors)
export(rtversion)
export(runifmat)
export(set_msg_sink)
export(set_outcome)
export(setdiffsym)
export(setup_CART)
export(setup_CMeans)
export(setup_DBSCAN)
export(setup_ExecutionConfig)
export(setup_GAM)
export(setup_GLM)
export(setup_GLMNET)
export(setup_GridSearch)
export(setup_HardCL)
export(setup_ICA)
export(setup_Isomap)
export(setup_Isotonic)
export(setup_KMeans)
export(setup_LightCART)
export(setup_LightGBM)
export(setup_LightRF)
export(setup_LightRuleFit)
export(setup_LinearSVM)
export(setup_NMF)
export(setup_NeuralGas)
export(setup_PCA)
export(setup_Preprocessor)
export(setup_RadialSVM)
export(setup_Ranger)
export(setup_Resampler)
export(setup_SuperConfig)
export(setup_SuperConfigLive)
export(setup_TabNet)
export(setup_UMAP)
export(setup_tSNE)
export(size)
export(table_column_attr)
export(theme_black)
export(theme_blackgrid)
export(theme_blackigrid)
export(theme_darkgray)
export(theme_darkgraygrid)
export(theme_darkgrayigrid)
export(theme_lightgraygrid)
export(theme_mediumgraygrid)
export(theme_white)
export(theme_whitegrid)
export(theme_whiteigrid)
export(to_json)
export(train)
export(uniprot_get)
export(with_msg_sink)
export(write_toml)
export(xtdescribe)
import(S7)
import(data.table)
import(grDevices)
import(graphics)
import(htmltools)
import(methods)
import(stats)
importFrom(utils,getFromNamespace)
importFrom(utils,head)
importFrom(utils,packageVersion)
importFrom(utils,sessionInfo)
importFrom(utils,tail)
================================================
FILE: NEWS.md
================================================
# rtemis news
## 1.0.0 First CRAN release
## 1.0.1
- Introduce `VariableImportance` S7 class to represent variable importance data, allowing for more than one measure of importance per model and update all relevant classes and methods.
- Calculate Partial_Effect_Variance as variable importance measure for GAM models
- Add `execution_config` argument to internal `train_` method and use it in LightRuleFit to propagate to LightGBM and GLMNET calls.
================================================
FILE: R/00_S7init.R
================================================
# S7_init.R
# ::rtemis::
# 2025- EDG rtemis.org
# References
# S7 generics: https://rconsortium.github.io/S7/articles/generics-methods.html
# %% --- S3 Classes for S7 ----------------------------------------------------------------------------
class_data.table <- new_S3_class("data.table")
class_lgb.Booster <- new_S3_class("lgb.Booster")
# All internal methods should support data.frame, data.table, tbl_df
class_tabular <- new_union(class_data.frame, class_data.table)
# Supervised learning model classes
class_glm <- new_S3_class("glm")
class_gam <- new_S3_class("gam")
class_glmnet <- new_S3_class("glmnet")
class_cv.glmnet <- new_S3_class("cv.glmnet")
class_stepfun <- new_S3_class("stepfun") # Isotonic regression
class_rpart <- new_S3_class("rpart")
class_ranger <- new_S3_class("ranger")
class_svm <- new_S3_class("svm")
class_tabnet_fit <- new_S3_class("tabnet_fit")
# %% --- Generics -------------------------------------------------------------------------------------
# %% repr ----
#' String representation
#'
#' @param x rtemis object.
#'
#' @return Character string representation of the object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
repr <- new_generic("repr", "x")
# %% inspect ----
#' Inspect rtemis object
#'
#' @param x R object to inspect.
#'
#' @return Called for side effect of printing information to console; returns character string
#' invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' inspect(iris)
inspect <- new_generic("inspect", "x", function(x) {
S7_dispatch()
}) # /rtemis::inspect
# %% preprocess ----
#' @name
#' preprocess
#'
#' @title
#' Preprocess Data
#'
#' @description
#' Preprocess data for analysis and visualization.
#'
#' @details
#' Methods are provided for preprocessing training set data, which accepts a `PreprocessorConfig`
#' object, and for preprocessing validation and test set data, which accept a `Preprocessor`
#' object.
#'
#' @return `Preprocessor` object.
#'
#' @author EDG
#' @rdname preprocess
#' @export
#'
#' @examples
#' # Setup a `Preprocessor`: this outputs a `PreprocessorConfig` object.
#' prp <- setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE)
#'
#' # Includes a long list of parameters
#' prp
#'
#' # Resample iris to get train and test data
#' res <- resample(iris, setup_Resampler(seed = 2026))
#' iris_train <- iris[res[[1]], ]
#' iris_test <- iris[-res[[1]], ]
#'
#' # Preprocess training data
#' iris_pre <- preprocess(iris_train, prp)
#'
#' # Access preprocessd training data with `preprocessed()`
#' preprocessed(iris_pre)
#'
#' # Apply the same preprocessing to test data
#' # In this case, the scale and center values from training data will be used.
#' # Note how `preprocess()` accepts either a `PreprocessorConfig` or `Preprocessor` object for
#' # this reason.
#' iris_test_pre <- preprocess(iris_test, iris_pre)
#'
#' # Access preprocessed test data
#' preprocessed(iris_test_pre)
preprocess <- new_generic("preprocess", c("x", "config"))
# %% train_ ----
#' Generic for training supervised learning models
#'
#' @description
#' Internal S7 generic that dispatches algorithm-specific training based on
#' `Hyperparameters` class. Called by `train()`.
#'
#' @param hyperparameters `Hyperparameters` object: Algorithm-specific hyperparameters.
#' @param x tabular data: Training set.
#' @param weights Optional Numeric vector: Case weights.
#' @param dat_validation Optional tabular data: Validation set for algorithms that support early stopping.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Algorithm-specific fitted model object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
train_ <- new_generic(
"train_",
"hyperparameters",
function(
hyperparameters,
x,
weights = NULL,
dat_validation = NULL,
execution_config = setup_ExecutionConfig(),
verbosity = 1L
) {
S7_dispatch()
}
) # /rtemis::train_
# %% predict_super ----
#' Predict from supervised learning model (internal)
#'
#' @description
#' Internal S7 generic that dispatches algorithm-specific prediction based on
#' model class.
#'
#' @param model Fitted model object.
#' @param newdata tabular data: New data for prediction.
#' @param type Character: Type of supervised learning ("Classification" or "Regression").
#' @param ... Additional arguments (not currently used).
#'
#' @return Predictions (class probabilities for classification, numeric for regression).
#'
#' @author EDG
#' @keywords internal
#' @noRd
predict_super <- new_generic(
"predict_super",
"model",
function(model, newdata, type = NULL, verbosity = 0L) {
S7_dispatch()
}
) # /rtemis::predict_super
# %% varimp_super ----
#' Get variable importance (internal)
#'
#' @description
#' Internal S7 generic that dispatches algorithm-specific variable importance
#' extraction based on model class.
#'
#' @param object Fitted model object.
#'
#' @return Numeric vector of variable importance scores (named by feature).
#'
#' @author EDG
#' @keywords internal
#' @noRd
varimp_super <- new_generic(
"varimp_super",
"model",
function(model, ...) {
S7_dispatch()
}
) # /rtemis::varimp_super
# %% se_super ----
#' Get standard errors of predictions (internal)
#'
#' @description
#' Internal S7 generic for extracting standard errors from regression models.
#'
#' @param object Fitted model object.
#' @param newdata tabular data: New data for prediction.
#'
#' @return Numeric vector of standard errors.
#'
#' @author EDG
#' @keywords internal
#' @noRd
se_super <- new_generic(
"se_super",
"model",
function(model, newdata) {
S7_dispatch()
}
)
# %% se ----
# Standard error of the fit.
se <- new_generic("se", "x")
# %% decomp_ ----
#' Generic for decomposition
#'
#' @author EDG
#' @keywords internal
#' @noRd
decomp_ <- new_generic(
"decomp_",
"config",
function(config, x, verbosity = 1L) {
S7_dispatch()
}
) # /rtemis::decomp_
# %% cluster_ ----
#' Generic for clustering
#'
#' @author EDG
#' @keywords internal
#' @noRd
cluster_ <- new_generic(
"cluster_",
"config",
function(config, x, verbosity = 1L) {
S7_dispatch()
}
) # /rtemis::cluster_
# %% desc ----
#' Short description for inline printing.
#' This is like `repr` for single-line descriptions.
#'
#' @author EDG
#' @keywords internal
#' @noRd
desc <- new_generic("desc", "x")
# %% get_metric ----
#' Get metric
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_metric <- new_generic("get_metric", "x")
# %% validate_hyperparameters ----
#' Check hyperparameters given training data
#'
#' @param x tabular data: Training data.
#' @param hyperparameters `Hyperparameters` to check.
#'
#' @author EDG
#' @keywords internal
#' @noRd
validate_hyperparameters <- new_generic(
"validate_hyperparameters",
"x",
function(x, hyperparameters) {
S7_dispatch()
}
) # /rtemis::validate_hyperparameters
# %% plot_metric ----
#' Plot Metric
#'
#' @description
#' Plot metric for `SupervisedRes` objects.
#'
#' @param x `SupervisedRes` object.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @return plotly object
#'
#' @author EDG
#' @keywords internal
#' @noRd
plot_metric <- new_generic("plot_metric", "x")
# %% plot_roc ----
#' Plot ROC curve
#'
#' @description
#' This generic is used to plot the ROC curve for a model.
#'
#' @param x `Classification` or `ClassificationRes` object.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @return A plotly object containing the ROC curve.
#'
#' @author EDG
#' @export
#'
#' @examples
#' ir <- iris[51:150, ]
#' ir[["Species"]] <- factor(ir[["Species"]])
#' species_glm <- train(ir, algorithm = "GLM")
#' plot_roc(species_glm)
plot_roc <- new_generic("plot_roc", "x")
# %% plot_varimp ----
#' Plot Variable Importance
#'
#' @description
#' Plot Variable Importance for Supervised objects.
#'
#' @param x `Supervised` or `SupervisedRes` object.
#' @param ... Additional arguments passed to methods.
#'
#' @details
#' This method calls [draw_varimp] internally.
#' If you pass an integer to the `plot_top` argument, the method will plot this many top features.
#' If you pass a number between 0 and 1 to the `plot_top` argument, the method will plot this
#' fraction of top features.
#'
#' @return plotly object or invisible NULL if no variable importance is available.
#'
#' @author EDG
#' @export
#'
#' @seealso [draw_varimp], which is called by this method
#'
#' @examplesIf interactive()
#' ir <- set_outcome(iris, "Sepal.Length")
#' seplen_cart <- train(ir, algorithm = "CART")
#' plot_varimp(seplen_cart)
#' # Plot horizontally
#' plot_varimp(seplen_cart, orientation = "h")
#' plot_varimp(seplen_cart, orientation = "h", plot_top = 3L)
#' plot_varimp(seplen_cart, orientation = "h", plot_top = 0.5)
plot_varimp <- new_generic("plot_varimp", "x")
# %% plot_true_pred ----
#' Plot True vs. Predicted Values
#'
#' @description
#' Plot True vs. Predicted Values for Supervised objects.
#' For classification, it plots a confusion matrix.
#' For regression, it plots a scatter plot of true vs. predicted values.
#'
#' @param x `Supervised` or `SupervisedRes` object.
#' @param ... Additional arguments passed to methods.
#'
#' @return plotly object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- set_outcome(iris, "Sepal.Length")
#' sepallength_glm <- train(x, algorithm = "GLM")
#' plot_true_pred(sepallength_glm)
plot_true_pred <- new_generic("plot_true_pred", "x")
# %% plot_manhattan ----
#' Manhattan plot
#'
#' @description
#' Draw a Manhattan plot for `MassGLM` objects created with [massGLM].
#'
#' @param x `MassGLM` object.
#' @param ... Additional arguments passed to methods.
#'
#' @return plotly object.
#'
#' @author EDG
#' @export
# example included in `plot_manhattan.MassGLM` method.
plot_manhattan <- new_generic("plot_manhattan", "x")
# %% describe ----
#' Describe object
#'
#' @param x R object to describe. See method documentation for supported classes.
#' @param ... Additional arguments passed to methods. See details.
#'
#' @details
#' Extra arguments for `factor` method:
#' - `max_n`: Integer: Return counts for up to this many levels.
#' - `return_ordered`: Logical: If TRUE, return levels ordered by count, otherwise return in level order.
#' - `verbosity`: Integer: Verbosity level.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # --- For `Supervised` objects ---
#' species_lightrf <- train(iris, algorithm = "lightrf")
#' describe(species_lightrf)
#'
#' # --- For `SupervisedRes` objects ---
#' mod <- train(iris, algorithm = "CART", outer_resampling_config = setup_Resampler())
#' describe(mod)
#'
#' # --- For factors ---
#' # Small number of levels
#' describe(iris[["Species"]])
#'
#' # Large number of levels: show top n by count
#' x <- factor(sample(letters, 1000, TRUE))
#' describe(x)
#' describe(x, 3)
#' describe(x, 3, return_ordered = FALSE)
describe <- new_generic("describe", "x")
# %% present ----
#' Present rtemis object
#'
#' @description
#' This generic is used to present an rtemis object by printing to console and drawing plots.
#'
#' @param x `Supervised` or `SupervisedRes` object or list of such objects.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @return A plotly object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' ir <- set_outcome(iris, "Sepal.Length")
#' seplen_lightrf <- train(ir, algorithm = "lightrf")
#' present(seplen_lightrf)
present <- new_generic("present", "x")
# %% get_hyperparams_need_tuning ----
#' Get hyperparameters that need tuning.
#'
#' @return Character vector of hyperparameter names that need tuning.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_hyperparams_need_tuning <- new_generic("get_hyperparams_need_tuning", "x")
# %% get_hyperparams ----
#' Get hyperparameters.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_hyperparams <- new_generic("get_hyperparams", c("x", "param_names"))
# %% extract_rules ----
#' Extract rules from a model.
#'
#' @author EDG
#' @keywords internal
#' @noRd
extract_rules <- new_generic("extract_rules", "x")
# %% get_factor_levels ----
#' @name get_factor_levels
#'
#' @title
#' Get factor levels from data.frame or similar
#'
#' @usage
#' get_factor_levels(x)
#'
#' @param x tabular data.
#'
#' @return Named list of factor levels. Names correspond to column names.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_factor_levels <- new_generic(
"get_factor_levels",
"x",
function(x) S7_dispatch()
)
method(get_factor_levels, class_data.frame) <- function(x) {
factor_index <- which(sapply(x, is.factor))
lapply(x[, factor_index, drop = FALSE], levels)
}
method(get_factor_levels, class_data.table) <- function(x) {
factor_index <- which(sapply(x, is.factor))
lapply(x[, factor_index, with = FALSE], levels)
}
# %% to_html ----
#' Convert to HTML
#'
#' @author EDG
#' @keywords internal
#' @noRd
to_html <- new_generic("to_html", "x")
# %% to_toml ----
#' Convert to TOML
#'
#' @author EDG
#' @keywords internal
#' @noRd
to_toml <- new_generic("to_toml", "x")
# %% to_yaml ----
#' Convert to YAML
#'
#' @author EDG
#' @keywords internal
#' @noRd
to_yaml <- new_generic("to_yaml", "x")
# %% to_json ----
#' Convert to JSON-serializable list
#'
#' Convert an rtemis S7 object to a named list suitable for
#' `jsonlite::toJSON(auto_unbox = TRUE)`. Used by the rtemislive backend
#' to send structured results to the browser frontend without scraping
#' R console output.
#'
#' Each output list includes a `.class` field equal to the most specific
#' S7 class name, allowing the frontend to dispatch to a class-specific
#' renderer.
#'
#' The default method walks `props(x)`, recursing into S7-typed properties
#' and passing through primitive properties as-is. Per-class methods
#' override where the default isn't appropriate (e.g. classes whose props
#' include a `data.table`, an opaque model fit, or where some props should
#' be excluded for size or relevance reasons).
#'
#' @param x rtemis S7 object.
#' @param ... Additional arguments passed to method.
#'
#' @return Named list. Pass through `jsonlite::toJSON(auto_unbox = TRUE)`
#' for serialization.
#'
#' @author EDG
#' @keywords internal
#' @export
to_json <- new_generic("to_json", "x")
# %% to_json default ----
#' @name to_json
#' @keywords internal
#' @noRd
method(to_json, S7_object) <- function(x, ...) {
ps <- props(x)
body <- lapply(ps, .to_json_value)
c(list(.class = S7_class(x)@name), body)
} # /rtemis::to_json.S7_object
#' Recursively convert a value to a JSON-serializable form
#'
#' Handles the common composite shapes encountered when walking S7 props:
#' nested S7 objects (recurse via the generic), lists that may *contain*
#' S7 objects (recurse element-wise), and primitives / data.frames
#' (pass through — jsonlite supports them natively).
#'
#' @param v Value from an S7 property.
#'
#' @return JSON-serializable value.
#'
#' @author EDG
#' @keywords internal
#' @noRd
.to_json_value <- function(v) {
if (is.null(v)) {
return(NULL)
}
if (S7_inherits(v)) {
return(to_json(v))
}
# data.frame / data.table are list-like but jsonlite handles them natively.
if (is.list(v) && !is.data.frame(v)) {
return(lapply(v, .to_json_value))
}
v
} # /rtemis::.to_json_value
# %% write_toml ----
#' @name
#' write_toml
#'
#' @title
#' Write to TOML file
#'
#' @author EDG
#' @export
# examples include in method documentation
write_toml <- new_generic(
"write_toml",
"x",
function(x, file, overwrite = FALSE, verbosity = 1L) {
S7_dispatch()
}
) # /rtemis::write_toml
# %% inc ----
#' Select (include) columns by character or numeric vector.
#'
#' @param x tabular data.
#' @param idx Character or numeric vector: Column names or indices to include.
#'
#' @return data.frame, tibble, or data.table.
#'
#' @author EDG
#' @export
#'
#' @examples
#' inc(iris, c(3, 4)) |> head()
#' inc(iris, c("Sepal.Length", "Species")) |> head()
inc <- new_generic("inc", "x", function(x, idx) {
S7_dispatch()
})
# %% exc ----
#' Exclude columns by character or numeric vector.
#'
#' @param x tabular data.
#' @param idx Character or numeric vector: Column names or indices to exclude.
#'
#' @return data.frame, tibble, or data.table.
#'
#' @author EDG
#' @export
#'
#' @examples
#' exc(iris, "Species") |> head()
#' exc(iris, c(1, 3)) |> head()
exc <- new_generic("exc", c("x", "idx"), function(x, idx) {
S7_dispatch()
})
method(inc, class_data.frame) <- function(x, idx) {
x[, idx, drop = FALSE]
}
method(inc, class_data.table) <- function(x, idx) {
x[, .SD, .SDcols = idx]
}
method(exc, list(class_data.frame, class_character)) <- function(x, idx) {
x[, -which(names(x) %in% idx), drop = FALSE]
}
method(exc, list(class_data.frame, class_integer)) <- function(x, idx) {
x[, -idx, drop = FALSE]
}
method(exc, list(class_data.frame, class_double)) <- function(x, idx) {
idx <- clean_int(idx)
x[, -idx, drop = FALSE]
}
method(
exc,
list(class_data.table, class_character | class_integer)
) <- function(x, idx) {
x[, .SD, .SDcols = -idx]
}
method(exc, list(class_data.table, class_double)) <- function(x, idx) {
idx <- clean_int(idx)
x[, .SD, .SDcols = -idx]
}
# %% outcome_name ----
#' Get the name of the last column
#'
#' @details
#' This applied to tabular datasets used for supervised learning in rtemis,
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data.
#'
#' @return Name of the last column.
#'
#' @author EDG
#' @export
#'
#' @examples
#' outcome_name(iris)
outcome_name <- new_generic("outcome_name", "x", function(x) {
S7_dispatch()
})
method(outcome_name, class_data.frame) <- function(x) {
names(x)[NCOL(x)]
} # /rtemis::outcome_name
# %% outcome ----
#' Get the outcome as a vector
#'
#' Returns the last column of `x`, which is by convention the outcome variable.
#'
#' @details
#' This applied to tabular datasets used for supervised learning in rtemis,
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data.
#'
#' @return Vector containing the last column of `x`.
#'
#' @author EDG
#' @export
#'
#' @examples
#' outcome(iris)
outcome <- new_generic("outcome", "x", function(x) {
S7_dispatch()
}) # /rtemis::outcome
method(outcome, class_data.frame) <- function(x) {
x[[NCOL(x)]]
}
# %% features ----
#' Get features from tabular data
#'
#' Returns all columns except the last one.
#'
#' @details
#' This can be applied to tabular datasets used for supervised learning in \pkg{rtemis},
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data: Input data to get features from.
#'
#' @return Object of the same class as the input, after removing the last column.
#'
#' @author EDG
#' @export
#'
#' @examples
#' features(iris) |> head()
features <- new_generic("features", "x", function(x) {
S7_dispatch()
}) # /rtemis::features
method(features, class_data.frame) <- function(x) {
if (NCOL(x) < 2) {
cli::cli_abort("Input must have at least 2 columns.")
}
x[, -NCOL(x), drop = FALSE]
}
method(features, class_data.table) <- function(x) {
if (NCOL(x) < 2) {
cli::cli_abort("Input must have at least 2 columns.")
}
x[, -NCOL(x), with = FALSE]
} # /rtemis::features.class_data.table
# %% feature_names ----
#' Get feature names
#'
#' Returns all column names except the last one
#'
#' @details
#' This applied to tabular datasets used for supervised learning in rtemis,
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data.
#'
#' @return Character vector of feature names.
#'
#' @author EDG
#' @export
#'
#' @examples
#' feature_names(iris)
feature_names <- new_generic("feature_names", "x", function(x) {
S7_dispatch()
}) # /rtemis::feature_names
method(feature_names, class_data.frame) <- function(x) {
if (NCOL(x) < 2) {
cli::cli_abort("Input must have at least 2 columns.")
}
names(x)[-NCOL(x)]
} # /rtemis::feature_names.class_data.frame
# %% check_factor_levels ----
#' Check factor levels
#'
#' @author EDG
#' @keywords internal
#' @noRd
check_factor_levels <- new_generic("check_factor_levels", c("x"))
# %% get_factor_names ----
#' Get factor names
#'
#' @details
#' This applied to tabular datasets used for supervised learning in rtemis,
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data.
#'
#' @return Character vector of factor names.
#'
#' @author EDG
#' @export
#'
#' @examples
#' get_factor_names(iris)
get_factor_names <- new_generic("get_factor_names", "x", function(x) {
S7_dispatch()
}) # /rtemis::get_factor_names
method(get_factor_names, class_data.frame) <- function(x) {
names(x)[sapply(x, is.factor)]
}
# %% calibrate ----
#' Calibrate `Classification` & `ClassificationRes` Models
#'
#' @description
#' Generic function to calibrate binary classification models.
#'
#' @param x `Classification` or `ClassificationRes` object to calibrate.
#' @param algorithm Character: Algorithm to use to train calibration model.
#' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions.
#' @param verbosity Integer: Verbosity level.
#' @param ... Additional arguments passed to specific methods.
#'
#' @section Method-specific parameters:
#'
#' **For `Classification` objects:**
#' * `predicted_probabilities`: Numeric vector of predicted probabilities
#' * `true_labels`: Factor of true class labels
#'
#' **For `ClassificationRes` objects:**
#' * `resampler_config`: `ResamplerConfig` object for calibration training
#' * `train_verbosity`: Integer controlling calibration model training output
#'
#' @details
#' The goal of calibration is to adjust the predicted probabilities of a binary classification
#' model so that they better reflect the true probabilities (i.e. empirical risk) of the positive
#' class.
#'
#' @return Calibrated model object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # --- Calibrate Classification ---
#' dat <- iris[51:150, ]
#' res <- resample(dat)
#' dat$Species <- factor(dat$Species)
#' dat_train <- dat[res[[1]], ]
#' dat_test <- dat[-res[[1]], ]
#'
#' # Train GLM on a training/test split
#' mod_c_glm <- train(
#' x = dat_train,
#' dat_test = dat_test,
#' algorithm = "glm"
#' )
#'
#' # Calibrate the `Classification` by defining `predicted_probabilities` and `true_labels`,
#' # in this case using the training data, but it could be a separate calibration dataset.
#' mod_c_glm_cal <- calibrate(
#' mod_c_glm,
#' predicted_probabilities = mod_c_glm$predicted_prob_training,
#' true_labels = mod_c_glm$y_training
#' )
#' mod_c_glm_cal
#'
#' # --- Calibrate ClassificationRes ---
#'
#' # Train GLM with cross-validation
#' resmod_c_glm <- train(
#' x = dat,
#' algorithm = "glm",
#' outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
#' )
#'
#' # Calibrate the `ClassificationRes` using the same resampling configuration as used for training.
#' resmod_c_glm_cal <- calibrate(resmod_c_glm)
#' resmod_c_glm_cal
calibrate <- new_generic(
"calibrate",
("x"),
function(
x,
algorithm = "isotonic",
hyperparameters = NULL,
verbosity = 1L,
...
) {
S7_dispatch()
}
) # /rtemis::calibrate
# %% freeze ----
#' Freeze Hyperparameters
#'
#' @param x `Hyperparameters` object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
freeze <- new_generic("freeze", "x")
# %% lock ----
#' Lock Hyperparameters
#'
#' @param x `Hyperparameters` object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
lock <- new_generic("lock", "x")
# %% needs_tuning ----
#' needs_tuning
#'
#' @keywords internal
#' @noRd
needs_tuning <- new_generic("needs_tuning", "x")
# %% get_factor_levels ----
#' @name get_factor_levels
#'
#' @title
#' Get factor levels from data.frame or similar
#'
#' @usage
#' get_factor_levels(x)
#'
#' @param x tabular data.
#'
#' @return Named list of factor levels. Names correspond to column names.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_factor_levels <- new_generic(
"get_factor_levels",
"x",
function(x) S7_dispatch()
)
method(get_factor_levels, class_data.frame) <- function(x) {
factor_index <- which(sapply(x, is.factor))
lapply(x[, factor_index, drop = FALSE], levels)
}
method(get_factor_levels, class_data.table) <- function(x) {
factor_index <- which(sapply(x, is.factor))
# with = FALSE slightly more performance than using .SD
lapply(x[, factor_index, with = FALSE], levels)
}
# %% is_tuned ----
is_tuned <- new_generic("is_tuned", "x")
# %% get_tuned_status ----
get_tuned_status <- new_generic("get_tuned_status", "x")
# %% one_hot ----
one_hot <- new_generic("one_hot", "x")
# --- Custom S7 validators -------------------------------------------------------------------------
# %% scalar_dbl ----
#' Scalar double
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_dbl <- S7::new_property(
class = S7::class_double | NULL,
validator = function(value) {
if (!is.null(value)) {
if (length(value) != 1) {
"must be a scalar double."
} else if (!is.double(value)) {
"must be double."
}
}
}
) # /rtemis::scalar_dbl
# %% scalar_dbl_01excl ----
#' Scalar double between 0 and 1, exclusive
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_dbl_01excl <- S7::new_property(
class = S7::class_double | NULL,
validator = function(value) {
if (!is.null(value)) {
if (length(value) != 1) {
"must be a scalar double."
} else if (value <= 0 || value >= 1) {
"must be between > 0 and < 1."
}
}
}
) # /rtemis::scalar_dbl_01excl
# %% scalar_dbl_01incl ----
#' Scalar double between 0 and 1, inclusive
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_dbl_01incl <- S7::new_property(
class = S7::class_double | NULL,
validator = function(value) {
if (!is.null(value)) {
if (length(value) != 1) {
"must be a scalar double."
} else if (value < 0 || value > 1) {
"must be between >= 0 and <= 1."
}
}
}
) # /rtemis::scalar_dbl_01incl
# %% scalar_int ----
#' Scalar integer
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_int <- S7::new_property(
class = S7::class_integer | NULL,
validator = function(value) {
if (!is.null(value)) {
if (length(value) != 1) {
"must be a scalar integer."
}
}
}
) # /rtemis::scalar_int
# %% scalar_int_pos ----
#' Scalar positive integer
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_int_pos <- S7::new_property(
class = S7::class_integer | NULL,
validator = function(value) {
if (!is.null(value)) {
if (length(value) != 1) {
"must be a positive integer scalar."
} else if (value < 0) {
"must be >= 0."
}
}
}
) # /rtemis::scalar_int_pos
# %% preprocessed ----
#' Get preprocessed data from `Preprocessor`.
#'
#' Returns the preprocessed data from a `Preprocessor` object.
#'
#' @param x `Preprocessor`: A `Preprocessor` object.
#'
#' @return data.frame: The preprocessed data.
#'
#' @export
#'
#' @examples
#' prp <- preprocess(iris, setup_Preprocessor(scale = TRUE, center = TRUE))
#' preprocessed(prp)
preprocessed <- new_generic("preprocessed", "x", function(x) {
S7_dispatch()
}) # /rtemis::preprocessed
# --- Internal functions ---------------------------------------------------------------------------
#' Get output type
#'
#' Get output type for printing text.
#'
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#' @param filename Character: Filename for output.
#'
#' @return Character with selected output type.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
get_output_type <- function(
output_type = c("ansi", "html", "plain"),
filename = NULL
) {
if (!is.null(filename)) {
return("plain")
}
if (is.null(output_type)) {
if (interactive()) {
return("ansi")
} else {
return("plain")
}
}
match.arg(output_type)
} # /rtemis::get_output_type
# %% S7_to_list ----
S7_to_list <- function(x) {
if (S7_inherits(x)) {
x <- props(x)
}
if (is.list(x)) {
x <- lapply(x, S7_to_list)
}
x
} # /rtemis::S7_to_list
# %% toml_empty_to_null ----
toml_empty_to_null <- function(x) {
if (!is.list(x)) {
return(x)
}
if (length(x) == 0L) {
return(NULL)
}
if (is.null(names(x))) {
scalar_types <- vapply(
x,
function(el) {
is.atomic(el) && length(el) == 1L && !is.null(el)
},
logical(1)
)
if (all(scalar_types)) {
return(unlist(x, use.names = FALSE))
}
}
lapply(x, toml_empty_to_null)
} # /rtemis::toml_empty_to_null
# %% write_lines ----
#' Write lines to file
#'
#' Normalizes path, check if directory exists, creates it if necessary,
#' writes lines to file, and checks if file was created successfully.
#'
#' @param x Character: Text to write to file.
#' @param file Character: Path to output file.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Invisible NULL. Called for side effect of writing to file.
#'
#' @author EDG
#' @keywords internal
#' @noRd
write_lines <- function(x, file, overwrite = FALSE, verbosity = 1L) {
# Normalize path
file <- normalizePath(file, mustWork = FALSE)
# Check if file exists
if (file.exists(file)) {
if (overwrite) {
if (verbosity >= 1L) {
msg(fmt(
paste("Overwriting existing file:", file),
col = rtemis_colors[["orange"]]
))
}
} else {
cli::cli_abort(
"File already exists: {file}. Set `overwrite = TRUE` to overwrite."
)
}
}
# Get directory name
dir <- dirname(file)
# Check if directory exists, create it if not
if (!dir.exists(dir)) {
dir.create(dir, recursive = TRUE)
if (!dir.exists(dir)) {
cli::cli_abort("Failed to create directory: {dir}")
} else {
if (verbosity >= 1L) {
msg(checkmark(), "Created directory:", dir)
}
}
}
# Write lines to file
writeLines(x, con = file)
# Check if file was created successfully
if (!file.exists(file)) {
cli::cli_abort("Failed to create file: {file}")
} else {
if (verbosity >= 1L) {
msg(checkmark(), "Created file:", file)
}
}
invisible(NULL)
} # /rtemis::write_lines
# %% toml_meta ----
#' @name
#' toml_meta
#'
#' @title
#' Write TOML metadata
#'
#' @description
#' Creates named list which will become first TOML table in the following format:
#'
#' ```toml
#' [_meta]
#' package = "rtemis"
#' package_version = "0.4.2"
#' schema_version = "1.0"
#' object_type = "SuperConfig"
#' created_at = 2026-2-11T22:45:00Z
#' ```
#' @param x Object to create metadata for. Class name will be included in metadata.
#' @param schema_version Character: Version of the schema to include in metadata.
#'
#' @return Named list containing metadata.
#'
#' @author EDG
#' @keywords internal
#' @noRd
toml_meta <- function(x, schema_version = "1.0") {
list(
`_meta` = list(
package = "rtemis",
package_version = as.character(packageVersion("rtemis")),
schema_version = schema_version,
object_type = S7_class(x)@name,
created_at = format(
Sys.time(),
"%Y-%m-%dT%H:%M:%SZ",
tz = "UTC"
)
)
)
} # /rtemis::toml_meta
# %% toml_with_meta ----
#' Create TOML string with metadata
#'
#' Creates a TOML string with an inline metadata table followed by the TOML representation of the
#' object.
#'
#' @param x Object to convert to TOML. Class name will be included in metadata.
#'
#'
#' @return Character string containing TOML representation of the object, with metadata included as
#' an inline table at the top.
#'
#' @author EDG
#' @keywords internal
#' @noRd
toml_with_meta <- function(x, payload, schema_version = "1.0") {
meta_block <- toml::write_toml(
toml_meta(x, schema_version = schema_version)
)
meta_lines <- strsplit(meta_block, "\n", fixed = TRUE)[[1]]
meta_lines <- meta_lines[meta_lines != "" & meta_lines != "[_meta]"]
meta_inline <- paste0(
"_meta = { ",
paste(meta_lines, collapse = ", "),
" }"
)
payload_str <- toml::write_toml(payload)
paste(meta_inline, payload_str, sep = "\n\n")
} # /rtemis::toml_with_meta
================================================
FILE: R/01_ExecutionConfig.R
================================================
# ExecutionConfig.R
# ::rtemis::
# 2026- EDG rtemis.org
# %% ExecutionConfig ----
#' ExecutionConfig Class
#'
#' @description
#' Execution Configuration Class, defining sequential/parallel/distributed execution settings.
#'
#' @author EDG
#' @noRd
ExecutionConfig <- new_class(
name = "ExecutionConfig",
properties = list(
backend = class_character,
n_workers = class_integer,
future_plan = class_character | NULL
),
constructor = function(backend, n_workers, future_plan) {
n_workers <- clean_int(n_workers)
check_character(backend, allow_null = FALSE)
check_character(future_plan, allow_null = TRUE)
new_object(
S7::S7_object(),
backend = backend,
n_workers = n_workers,
future_plan = future_plan
)
},
validator = function(self) {
if (self@backend == "future" && is.null(self@future_plan)) {
"@future_plan must be set when backend is 'future'."
} else if (self@backend == "none" && self@n_workers != 1L) {
"n_workers must be 1 when backend is 'none'."
} else if (self@backend == "mirai" && self@n_workers < 1L) {
"n_workers must be at least 1 when backend is 'mirai'."
} else if (self@backend == "future" && self@n_workers < 1L) {
"n_workers must be at least 1 when backend is 'future'."
}
}
) # /rtemis::ExecutionConfig
# %% repr.ExecutionConfig ----
method(repr, ExecutionConfig) <- function(x, pad = 0L, output_type = NULL) {
out <- repr_S7name("ExecutionConfig", pad = pad, output_type = output_type)
.props <- props(x)
if (.props[["backend"]] != "future") {
.props[["future_plan"]] <- NULL
}
out <- paste0(
out,
repr_ls(.props, pad = pad, output_type = output_type)
)
} # /rtemis::repr.ExecutionConfig
# %% print.ExecutionConfig ----
method(print, ExecutionConfig) <- function(x, output_type = NULL, ...) {
cat(repr(x, output_type = output_type), "\n")
invisible(x)
} # /rtemis::print.ExecutionConfig
# %% --- User API ----
# %% setup_ExecutionConfig ----
#' Setup Execution Configuration
#'
#' @param backend Character: Execution backend: "future", "mirai", or "none".
#' @param n_workers Integer: Number of workers for parallel execution. Only used if `backend is
#' "future"` or "mirai". Do not rely on the default value, set to an appropriate number depending
#' on your system.
#' @param future_plan Character: Future plan to use if `backend` is "future".
#'
#' @return `ExecutionConfig` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' setup_ExecutionConfig(backend = "future", n_workers = 4L, future_plan = "multisession")
setup_ExecutionConfig <- function(
backend = c("future", "mirai", "none"),
n_workers = NULL,
future_plan = NULL
) {
backend <- match.arg(backend)
if (backend == "future") {
check_dependencies("futurize")
check_character(future_plan, allow_null = TRUE)
if (is.null(future_plan)) {
future_plan <- getOption("future.plan", "mirai_multisession")
}
if (!future_plan %in% ALLOWED_PLANS) {
cli::cli_abort(
"{.val {future_plan}} is not an allowed future plan. Allowed plans: {.val {ALLOWED_PLANS}}."
)
}
if (is.null(n_workers)) {
n_workers <- parallelly::availableCores(omit = 3L)
}
} else if (backend == "mirai") {
check_dependencies("mirai")
if (is.null(n_workers)) {
n_workers <- parallelly::availableCores(omit = 3L)
}
} else if (backend == "none") {
if (is.null(n_workers)) {
n_workers <- 1L
} else if (n_workers != 1L) {
cli::cli_abort("n_workers must be 1 when backend is 'none'.")
}
}
n_workers <- clean_int(n_workers)
if (n_workers < 1L) {
cli::cli_abort("n_workers must be at least 1.")
}
ExecutionConfig(
backend = backend,
n_workers = n_workers,
future_plan = if (backend == "future") future_plan else NULL
)
} # /rtemis::setup_ExecutionConfig
================================================
FILE: R/02_Hyperparameters.R
================================================
# S7_Hyperparameters.R
# ::rtemis::
# 2025- EDG rtemis.org
# References ----
# S7
# - https://github.com/RConsortium/S7
# - https://rconsortium.github.io/S7/
# LightGBM parameters
# - https://lightgbm.readthedocs.io/en/latest/Parameters.html
# %% Constants ----
# `tuned` values ----
# -9: Set by Tuner: Actively being tuned (Values fixed by Tuner).
# -2: Set by constructor: Not tunable (No tunable_hyperparameters).
# -1: Set by constructor: Not tunable (tunable_hyperparameters exist, but none of them have more than one value).
# 0: Set by constructor: Untuned but tunable (at least one of tunable_hyperparameters has more than one value).
# 1: Set by Tuner: Tuned (Started as 0, set to 1 when tuned).
TUNED_STATUS_TUNING <- -9L
TUNED_STATUS_NOT_TUNABLE <- -2L
TUNED_STATUS_NO_SEARCH_VALUES <- -1L
TUNED_STATUS_UNTUNED <- 0L
TUNED_STATUS_TUNED <- 1L
# `resampled` values ----
# 0: Running on single training set.
# 1: Running on resampled training sets.
# %% Hyperparameters ----
#' @title Hyperparameters
#'
#' @description
#' Superclass for hyperparameters.
#'
#' @field algorithm Character: Algorithm name.
#' @field hyperparameters Named list of algorithm hyperparameter values.
#' @field tunable_hyperparameters Character: Names of tunable hyperparameters.
#' @field fixed_hyperparameters Character: Names of fixed hyperparameters.
#' @field tuned Integer: Tuning status.
#' @field resampled Integer: Outer resampling status.
#' @field n_workers Integer: Number of workers to use for tuning.
#'
#' @author EDG
#' @noRd
Hyperparameters <- new_class(
name = "Hyperparameters",
properties = list(
algorithm = class_character,
hyperparameters = class_list,
tunable_hyperparameters = class_character,
fixed_hyperparameters = class_character,
tuned = class_integer,
resampled = class_integer,
n_workers = class_integer
),
constructor = function(
algorithm,
hyperparameters,
tunable_hyperparameters,
fixed_hyperparameters,
n_workers = 1L
) {
# Test if any tunable_hyperparameters have more than one value
if (length(tunable_hyperparameters) > 0) {
if (any(sapply(hyperparameters[tunable_hyperparameters], length) > 1)) {
tuned <- 0L # Search values defined for tunable hyperparameters.
} else {
tuned <- -1L # No search values defined for tunable hyperparameters.
}
} else {
tuned <- -2L # No tunable hyperparameters
}
# GLMNET
if (algorithm == "GLMNET") {
if (is.null(hyperparameters[["lambda"]])) {
tuned <- 0L
}
}
# LightGBM
if (algorithm == "LightGBM") {
if (is.null(hyperparameters[["nrounds"]])) {
tuned <- 0L
}
}
# SVM
# Check kernel-specific hyperparameters
if (algorithm == "SVM") {
# linear => cost
if (hyperparameters[["kernel"]] == "linear") {
if (length(hyperparameters[["cost"]]) > 1) {
tuned <- 0L
}
} else if (hyperparameters[["kernel"]] == "polynomial") {
if (length(hyperparameters[["degree"]]) > 1) {
tuned <- 0L
}
} else if (hyperparameters[["kernel"]] == "radial") {
if (length(hyperparameters[["sigma"]]) > 1) {
tuned <- 0L
}
}
}
n_workers <- clean_posint(n_workers)
new_object(
S7_object(),
algorithm = algorithm,
hyperparameters = hyperparameters,
tunable_hyperparameters = tunable_hyperparameters,
fixed_hyperparameters = fixed_hyperparameters,
tuned = tuned,
resampled = 0L,
n_workers = n_workers
)
}
) # /rtemis::Hyperparameters
# %% repr.Hyperparameters ----
#' Repr Hyperparameters
#'
#' repr method for Hyperparameters object.
#'
#' @param x `Hyperparameters` object.
#' @param pad Integer: Left padding for printed output.
#' @param maxlength Integer: Maximum length of items to show using `headdot()` before truncating with ellipsis. `-1` means no limit.
#' @param limit Integer: Limit number of items to show. `-1` means no limit.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @author EDG
#' @noRd
method(repr, Hyperparameters) <- function(
x,
pad = 0L,
maxlength = -1L,
limit = -1L,
output_type = NULL
) {
output_type <- get_output_type(output_type)
out <- repr_S7name(
paste0(x@algorithm, "Hyperparameters"),
pad = pad,
output_type = output_type
)
out <- paste0(
out,
repr_ls(
props(x)[-1],
pad = pad,
maxlength = maxlength,
limit = limit,
output_type = output_type
)
)
if (x@tuned == TUNED_STATUS_TUNING) {
out <- paste0(
out,
fmt(
"\n Hyperparameters are being tuned.\n",
col = col_tuner,
bold = TRUE,
output_type = output_type
)
)
} else if (x@tuned == TUNED_STATUS_NOT_TUNABLE) {
out <- paste0(
out,
fmt(
"\n No hyperparameters are tunable.\n",
col = col_tuner,
bold = TRUE,
output_type = output_type
)
)
} else if (x@tuned == TUNED_STATUS_UNTUNED) {
need_tuning <- names(get_hyperparams_need_tuning(x))
out <- paste0(
out,
fmt(
paste0(
"\n ",
ngettext(length(need_tuning), "Hyperparameter ", "Hyperparameters "),
oxfordcomma(
need_tuning
),
ngettext(length(need_tuning), " needs ", " need "),
"tuning.\n"
),
col = col_tuner,
bold = TRUE,
output_type = output_type
)
)
} else if (x@tuned == TUNED_STATUS_NO_SEARCH_VALUES) {
out <- paste0(
out,
fmt(
"\n No search values defined for tunable hyperparameters.\n",
col = col_tuner,
bold = TRUE,
output_type = output_type
)
)
} else if (x@tuned == TUNED_STATUS_TUNED) {
out <- paste0(
out,
fmt(
"\n Hyperparameters are tuned.\n",
col = col_tuner,
bold = TRUE,
output_type = output_type
)
)
}
out
} # /rtemis::repr.Hyperparameters
# %% print.Hyperparameters ----
method(print, Hyperparameters) <- function(x, output_type = NULL, ...) {
cat(repr(x, output_type = output_type))
invisible(x)
} # /rtemis::print.Hyperparameters
# %% is_tuned.Hyperparameters ----
method(is_tuned, Hyperparameters) <- function(x) {
x@tuned == 1L
} # /is_tuned.Hyperparameters
# %% get_tuned_status.Hyperparameters ----
method(get_tuned_status, Hyperparameters) <- function(x) {
if (length(x@tunable_hyperparameters) > 0) {
if (any(sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1)) {
0L
} else {
-1L
}
} else {
-2L
}
} # /rtemis::get_tuned_status.Hyperparameters
# %% update.Hyperparameters ----
#' Update Hyperparameters
#'
#' @param x `Hyperparameters` object.
#' @param hyperparameters Named list of algorithm hyperparameter values.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(update, Hyperparameters) <- function(
object,
hyperparameters,
tuned = NULL,
...
) {
for (hp in names(hyperparameters)) {
object@hyperparameters[[hp]] <- hyperparameters[[hp]]
}
# Update tuned status
if (is.null(tuned)) {
object@tuned <- get_tuned_status(object)
} else {
object@tuned <- tuned
}
object
} # /rtemis::update.Hyperparameters
# %% freeze.Hyperparameters ----
method(freeze, Hyperparameters) <- function(x) {
x@tuned <- -1L
} # /rtemis::freeze.Hyperparameters
# %% lock.Hyperparameters ----
method(lock, Hyperparameters) <- function(x) {
x@tuned <- 1L
}
# %% `$`.Hyperparameters ----
# Make Hyperparameters@hyperparameters@name `$`-accessible
method(`$`, Hyperparameters) <- function(x, name) {
x@hyperparameters[[name]]
}
# %% `.DollarNames`.Hyperparameters ----
# `$`-autocomplete Hyperparameters@hyperparameters
method(`.DollarNames`, Hyperparameters) <- function(x, pattern = "") {
all_names <- names(x@hyperparameters)
grep(pattern, all_names, value = TRUE)
}
# %% `[[`.Hyperparameters ----
# Make Hyperparameters@hyperparameters@name `[[`-accessible
method(`[[`, Hyperparameters) <- function(x, name) {
x@hyperparameters[[name]]
}
# %% needs_tuning.Hyperparameters ----
method(needs_tuning, Hyperparameters) <- function(x) {
x@tuned == 0
} # /rtemis::needs_tuning.Hyperparameters
# %% get_hyperparams_need_tuning.Hyperparameters ----
#' Get hyperparameters that need tuning in an algorithm-specific way.
#'
#' @keywords internal
#' @noRd
method(get_hyperparams_need_tuning, Hyperparameters) <- function(x) {
# -> list
# Get tunable hyperparameters with more than one value
x@hyperparameters[x@tunable_hyperparameters[
sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1
]]
} # /get_hyperparams_need_tuning.Hyperparameters
# %% get_hyperparams.(Hyperparameters, class_character) ----
method(get_hyperparams, list(Hyperparameters, class_character)) <- function(
x,
param_names
) {
sapply(param_names, function(p) x@hyperparameters[p], USE.NAMES = FALSE)
} # /rtemis::get_hyperparams_need_tuning.Hyperparameters
# %% GLMHyperparameters ----
#' @author EDG
#'
#' @keywords internal
#' @noRd
GLMHyperparameters <- new_class(
name = "GLMHyperparameters",
parent = Hyperparameters,
constructor = function(ifw) {
new_object(
Hyperparameters(
algorithm = "GLM",
hyperparameters = list(
ifw = ifw
),
tunable_hyperparameters = "ifw",
fixed_hyperparameters = character()
)
)
} # /constructor
) # /rtemis::GLMHyperparameters
# %% setup_GLM ----
#' Setup GLM Hyperparameters
#'
#' Setup hyperparameters for GLM training.
#'
#' @param ifw (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return GLMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' glm_hyperparams <- setup_GLM(ifw = TRUE)
#' glm_hyperparams
setup_GLM <- function(ifw = FALSE) {
GLMHyperparameters(ifw = ifw)
}
# %% GAMHyperparameters ----
GAM_tunable <- c("k", "ifw")
GAM_fixed <- character()
#' @author EDG
#' @keywords internal
#' @noRd
GAMHyperparameters <- new_class(
name = "GAMHyperparameters",
parent = Hyperparameters,
constructor = function(k, ifw) {
new_object(
Hyperparameters(
algorithm = "GAM",
hyperparameters = list(
k = k,
ifw = ifw
),
tunable_hyperparameters = GAM_tunable,
fixed_hyperparameters = GAM_fixed
)
)
} # /constructor
) # /rtemis::GAMHyperparameters
# %% setup_GAM ----
#' Setup GAM Hyperparameters
#'
#' Setup hyperparameters for GAM training.
#'
#' Get more information from [mgcv::gam].
#'
#' @param k (Tunable) Integer: Number of knots.
#' @param ifw (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return GAMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' gam_hyperparams <- setup_GAM(k = 5L, ifw = FALSE)
#' gam_hyperparams
setup_GAM <- function(k = 5L, ifw = FALSE) {
k <- clean_posint(k)
GAMHyperparameters(k = k, ifw = ifw)
}
# %% CARTHyperparameters ----
CART_tunable <- c("cp", "maxdepth", "minsplit", "minbucket", "prune_cp", "ifw")
CART_fixed <- c(
"method",
"model",
"maxcompete",
"maxsurrogate",
"usesurrogate",
"surrogatestyle",
"xval",
"cost"
)
#' @title CARTHyperparameters
#'
#' @description
#' Hyperparameters subclass for CART.
#'
#' @author EDG
#' @keywords internal
#' @noRd
CARTHyperparameters <- new_class(
name = "CARTHyperparameters",
parent = Hyperparameters,
constructor = function(
cp,
maxdepth,
minsplit,
minbucket,
prune_cp,
method,
model,
maxcompete,
maxsurrogate,
usesurrogate,
surrogatestyle,
xval,
cost,
ifw
) {
new_object(
Hyperparameters(
algorithm = "CART",
hyperparameters = list(
cp = cp,
maxdepth = maxdepth,
minsplit = minsplit,
minbucket = minbucket,
prune_cp = prune_cp,
method = method,
model = model,
maxcompete = maxcompete,
maxsurrogate = maxsurrogate,
usesurrogate = usesurrogate,
surrogatestyle = surrogatestyle,
xval = xval,
cost = cost,
ifw = ifw
),
tunable_hyperparameters = CART_tunable,
fixed_hyperparameters = CART_fixed
)
)
} # /constructor
) # /rtemis::CARTHyperparameters
# %% setup_CART ----
#' Setup CART Hyperparameters
#'
#' Setup hyperparameters for CART training.
#'
#' Get more information from [rpart::rpart] and [rpart::rpart.control].
#'
#' @param cp (Tunable) Numeric: Complexity parameter.
#' @param maxdepth (Tunable) Integer: Maximum depth of tree.
#' @param minsplit (Tunable) Integer: Minimum number of observations in a node to split.
#' @param minbucket (Tunable) Integer: Minimum number of observations in a terminal node.
#' @param prune_cp (Tunable) Numeric: Complexity for cost-complexity pruning after tree is built
#' @param method String: Splitting method.
#' @param model Logical: If TRUE, return a model.
#' @param maxcompete Integer: Maximum number of competitive splits.
#' @param maxsurrogate Integer: Maximum number of surrogate splits.
#' @param usesurrogate Integer: Number of surrogate splits to use.
#' @param surrogatestyle Integer: Type of surrogate splits.
#' @param xval Integer: Number of cross-validation folds.
#' @param cost Numeric (>=0): One for each feature.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return CARTHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' cart_hyperparams <- setup_CART(cp = 0.01, maxdepth = 10L, ifw = TRUE)
#' cart_hyperparams
setup_CART <- function(
# tunable
cp = 0.01,
maxdepth = 20L,
minsplit = 2L,
minbucket = 1L, # round(minsplit / 3),
prune_cp = NULL,
# fixed
method = "auto",
model = TRUE,
maxcompete = 4L,
maxsurrogate = 5L,
usesurrogate = 2L,
surrogatestyle = 0L,
xval = 0L,
cost = NULL,
ifw = FALSE
) {
check_inherits(cp, "numeric")
maxdepth <- clean_int(maxdepth)
minsplit <- clean_int(minsplit)
minbucket <- clean_int(minbucket)
check_inherits(prune_cp, "numeric")
check_inherits(method, "character")
check_inherits(model, "logical")
maxcompete <- clean_int(maxcompete)
maxsurrogate <- clean_int(maxsurrogate)
usesurrogate <- clean_int(usesurrogate)
surrogatestyle <- clean_int(surrogatestyle)
xval <- clean_int(xval)
check_inherits(cost, "numeric")
CARTHyperparameters(
cp = cp,
maxdepth = maxdepth,
minsplit = minsplit,
minbucket = minbucket,
prune_cp = prune_cp,
method = method,
model = model,
maxcompete = maxcompete,
maxsurrogate = maxsurrogate,
usesurrogate = usesurrogate,
surrogatestyle = surrogatestyle,
xval = xval,
cost = cost,
ifw = ifw
)
} # /rtemis::setup_CART
# Test that all CART hyperparameters are set by setup_CART
stopifnot(all(c(CART_tunable, CART_fixed) %in% names(formals(setup_CART))))
# %% GLMNETHyperparameters ----
GLMNET_tunable <- c("alpha", "ifw")
GLMNET_fixed <- c(
"family",
"offset",
"which_lambda_cv",
"nlambda",
"penalty_factor",
"standardize",
"intercept"
)
#' @title GLMNETHyperparameters
#'
#' @description
#' Hyperparameters subclass for GLMNET.
#'
#' @author EDG
#' @keywords internal
#' @noRd
GLMNETHyperparameters <- new_class(
name = "GLMNETHyperparameters",
parent = Hyperparameters,
constructor = function(
alpha,
family,
offset,
which_lambda_cv,
nlambda,
lambda,
penalty_factor,
standardize,
intercept,
ifw
) {
check_float01inc(alpha)
check_inherits(which_lambda_cv, "character")
nlambda <- clean_posint(nlambda)
check_inherits(penalty_factor, "numeric")
check_inherits(standardize, "logical")
new_object(
Hyperparameters(
algorithm = "GLMNET",
hyperparameters = list(
alpha = alpha,
family = family,
offset = offset,
which_lambda_cv = which_lambda_cv,
nlambda = nlambda,
lambda = lambda,
penalty_factor = penalty_factor,
standardize = standardize,
intercept = intercept,
ifw = ifw
),
tunable_hyperparameters = GLMNET_tunable,
fixed_hyperparameters = GLMNET_fixed
)
)
} # /constructor
) # /rtemis::GLMNETHyperparameters
#' Setup GLMNET Hyperparameters
#'
#' Setup hyperparameters for GLMNET training.
#'
#' Get more information from [glmnet::glmnet].
#'
#' @param alpha (Tunable) Numeric: Mixing parameter.
#' @param family Character: Family for GLMNET.
#' @param offset Numeric: Offset for GLMNET.
#' @param which_lambda_cv Character: Which lambda to use for prediction:
#' "lambda.1se" or "lambda.min"
#' @param nlambda Positive integer: Number of lambda values.
#' @param lambda Numeric: Lambda values.
#' @param penalty_factor Numeric: Penalty factor for each feature.
#' @param standardize Logical: If TRUE, standardize features.
#' @param intercept Logical: If TRUE, include intercept.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return GLMNETHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' glm_hyperparams <- setup_GLMNET(alpha = 1, ifw = TRUE)
#' glm_hyperparams
setup_GLMNET <- function(
# tunable
alpha = 1,
# fixed
family = NULL,
offset = NULL,
which_lambda_cv = "lambda.1se",
nlambda = 100L,
lambda = NULL,
penalty_factor = NULL,
standardize = TRUE,
intercept = TRUE,
ifw = TRUE
) {
check_float01inc(alpha)
check_inherits(which_lambda_cv, "character")
nlambda <- clean_posint(nlambda)
check_inherits(penalty_factor, "numeric")
check_logical(standardize)
check_logical(ifw)
GLMNETHyperparameters(
family = family,
offset = offset,
alpha = alpha,
which_lambda_cv = which_lambda_cv,
nlambda = nlambda,
lambda = lambda,
penalty_factor = penalty_factor,
standardize = standardize,
intercept = intercept,
ifw = ifw
)
} # /rtemis::setup_GLMNET
# Test that all GLMNET hyperparameters are set by setup_GLMNET
stopifnot(all(
c(GLMNET_tunable, GLMNET_fixed) %in% names(formals(setup_GLMNET))
))
method(get_hyperparams_need_tuning, GLMNETHyperparameters) <- function(x) {
# Get tunable hyperparameters with more than one value
out <- x@hyperparameters[x@tunable_hyperparameters[
sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1
]]
if (is.null(x[["lambda"]])) {
out <- c(out, list(lambda = NULL))
}
out
} # /rtemis::get_hyperparams_need_tuning.GLMNETHyperparameters
# %% LightCARTHyperparameters ----
LightCART_tunable <- c(
"num_leaves",
"max_depth",
"lambda_l1",
"lambda_l2",
"min_data_in_leaf",
"max_cat_threshold",
"min_data_per_group",
"linear_tree",
"ifw"
)
LightCART_fixed <- c("objective")
#' @title LightCARTHyperparameters
#'
#' @description
#' Hyperparameters subclass for LightCART
#'
#' @author EDG
#' @keywords internal
#' @noRd
LightCARTHyperparameters <- new_class(
name = "LightCARTHyperparameters",
parent = Hyperparameters,
constructor = function(
num_leaves,
max_depth,
lambda_l1,
lambda_l2,
min_data_in_leaf,
max_cat_threshold,
min_data_per_group,
linear_tree,
objective,
ifw
) {
new_object(
Hyperparameters(
algorithm = "LightCART",
hyperparameters = list(
num_leaves = num_leaves,
max_depth = max_depth,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
min_data_in_leaf = min_data_in_leaf,
max_cat_threshold = max_cat_threshold,
min_data_per_group = min_data_per_group,
linear_tree = linear_tree,
objective = objective,
ifw = ifw
),
tunable_hyperparameters = LightCART_tunable,
fixed_hyperparameters = LightCART_fixed
)
)
} # /constructor
) # /rtemis::LightCARTHyperparameters
# %% setup_LightCART ----
#' Setup LightCART Hyperparameters
#'
#' Setup hyperparameters for LightCART training.
#'
#' Get more information from [lightgbm::lgb.train].
#'
#' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree.
#' @param max_depth (Tunable) Integer: Maximum depth of trees.
#' @param lambda_l1 (Tunable) Numeric: L1 regularization.
#' @param lambda_l2 (Tunable) Numeric: L2 regularization.
#' @param min_data_in_leaf (Tunable) Positive integer: Minimum number of data in a leaf.
#' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features.
#' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group.
#' @param linear_tree (Tunable) Logical: If TRUE, use linear trees.
#' @param objective Character: Objective function.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return LightCARTHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' lightcart_hyperparams <- setup_LightCART(num_leaves = 32L, ifw = FALSE)
#' lightcart_hyperparams
setup_LightCART <- function(
num_leaves = 32L,
max_depth = -1L,
lambda_l1 = 0,
lambda_l2 = 0,
min_data_in_leaf = 20L,
max_cat_threshold = 32L,
min_data_per_group = 100L,
linear_tree = FALSE,
objective = NULL,
ifw = FALSE
) {
num_leaves <- clean_posint(num_leaves)
max_depth <- clean_int(max_depth)
check_float0pos(lambda_l1)
check_float0pos(lambda_l2)
min_data_in_leaf <- clean_posint(min_data_in_leaf)
max_cat_threshold <- clean_posint(max_cat_threshold)
min_data_per_group <- clean_posint(min_data_per_group)
check_logical(linear_tree)
LightCARTHyperparameters(
num_leaves = num_leaves,
max_depth = max_depth,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
min_data_in_leaf = min_data_in_leaf,
max_cat_threshold = max_cat_threshold,
min_data_per_group = min_data_per_group,
linear_tree = linear_tree,
objective = objective,
ifw = ifw
)
} # /rtemis::setup_LightCART
# %% LightRFHyperparameters ----
LightRF_tunable <- c(
"nrounds",
"num_leaves",
"max_depth",
"feature_fraction",
"subsample",
"lambda_l1",
"lambda_l2",
"max_cat_threshold",
"min_data_per_group",
"ifw"
)
LightRF_fixed <- c(
"objective",
"device_type",
"tree_learner",
"boosting_type",
"learning_rate",
"subsample_freq",
"early_stopping_rounds",
"force_col_wise"
)
#' @title LightRFHyperparameters
#'
#' @description
#' Hyperparameters subclass for LightRF
#'
#' @author EDG
#' @keywords internal
#' @noRd
LightRFHyperparameters <- new_class(
name = "LightRFHyperparameters",
parent = Hyperparameters,
constructor = function(
nrounds,
num_leaves,
max_depth,
feature_fraction,
subsample,
lambda_l1,
lambda_l2,
max_cat_threshold,
min_data_per_group,
linear_tree,
ifw,
# fixed
objective,
device_type,
tree_learner,
force_col_wise
) {
new_object(
Hyperparameters(
algorithm = "LightRF",
hyperparameters = list(
nrounds = nrounds,
num_leaves = num_leaves,
max_depth = max_depth,
feature_fraction = feature_fraction,
subsample = subsample,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
max_cat_threshold = max_cat_threshold,
min_data_per_group = min_data_per_group,
linear_tree = linear_tree,
ifw = ifw,
# fixed
objective = objective,
device_type = device_type,
tree_learner = tree_learner,
force_col_wise = force_col_wise,
# unsettable: LightGBM params for RF
boosting_type = "rf",
learning_rate = 1, # no effect? in boosting_type 'rf', but set for clarity
subsample_freq = 1L, # a.k.a. bagging_freq
early_stopping_rounds = -1L
),
tunable_hyperparameters = LightRF_tunable,
fixed_hyperparameters = LightRF_fixed
)
)
}
) # /rtemis::LightRFHyperparameters
# %% setup_LightRF ----
#' Setup LightRF Hyperparameters
#'
#' Setup hyperparameters for LightRF training.
#'
#' Get more information from [lightgbm::lgb.train].
#' Note that hyperparameters subsample_freq and early_stopping_rounds are fixed,
#' and cannot be set because they are what makes `lightgbm` train a random forest.
#' These can all be set when training gradient boosting with LightGBM.
#'
#' @param nrounds (Tunable) Positive integer: Number of boosting rounds.
#' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree.
#' @param max_depth (Tunable) Integer: Maximum depth of trees.
#' @param feature_fraction (Tunable) Numeric: Fraction of features to use.
#' @param subsample (Tunable) Numeric: Fraction of data to use.
#' @param lambda_l1 (Tunable) Numeric: L1 regularization.
#' @param lambda_l2 (Tunable) Numeric: L2 regularization.
#' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features.
#' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group.
#' @param linear_tree Logical: If TRUE, use linear trees.
#' @param objective Character: Objective function.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#' @param device_type Character: "cpu" or "gpu".
#' @param tree_learner Character: "serial", "feature", "data", or "voting".
#' @param force_col_wise Logical: Use only with CPU - If TRUE, force col-wise histogram building.
#'
#' @return LightRFHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' lightrf_hyperparams <- setup_LightRF(nrounds = 1000L, ifw = FALSE)
#' lightrf_hyperparams
setup_LightRF <- function(
nrounds = 500L,
num_leaves = 4096L,
max_depth = -1L,
feature_fraction = 0.7,
subsample = .623, # a.k.a. bagging_fraction
lambda_l1 = 0,
lambda_l2 = 0,
max_cat_threshold = 32L,
min_data_per_group = 32L,
linear_tree = FALSE,
ifw = FALSE,
# fixed
objective = NULL,
device_type = "cpu",
tree_learner = "serial",
force_col_wise = TRUE
) {
nrounds <- clean_posint(nrounds)
num_leaves <- clean_posint(num_leaves)
max_depth <- clean_int(max_depth)
check_float01inc(feature_fraction)
check_float01inc(subsample)
check_float0pos(lambda_l1)
check_float0pos(lambda_l2)
max_cat_threshold <- clean_posint(max_cat_threshold)
min_data_per_group <- clean_posint(min_data_per_group)
check_logical(linear_tree)
LightRFHyperparameters(
nrounds = nrounds,
num_leaves = num_leaves,
max_depth = max_depth,
feature_fraction = feature_fraction,
subsample = subsample,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
max_cat_threshold = max_cat_threshold,
min_data_per_group = min_data_per_group,
linear_tree = linear_tree,
ifw = ifw,
objective = objective,
device_type = device_type,
tree_learner = tree_learner,
force_col_wise = force_col_wise
)
} # /rtemis::setupLightRF
# Test that all LightRF hyperparameters are set by setup_LightRF
# LightRF fixed hyperparameters are not editable.
stopifnot(all(LightRF_tunable %in% names(formals(setup_LightRF))))
# %% LightGBMHyperparameters ----
LightGBM_tunable <- c(
"num_leaves",
"max_depth",
"learning_rate",
"feature_fraction",
"subsample",
"subsample_freq",
"lambda_l1",
"lambda_l2",
"max_cat_threshold",
"min_data_per_group",
"linear_tree",
"ifw"
)
LightGBM_fixed <- c(
"max_nrounds",
"force_nrounds",
"early_stopping_rounds",
"objective",
"device_type",
"tree_learner",
"force_col_wise"
)
#' @title LightGBMHyperparameters
#'
#' @description
#' Hyperparameters subclass for LightGBM
#'
#' @author EDG
#' @keywords internal
#' @noRd
LightGBMHyperparameters <- new_class(
name = "LightGBMHyperparameters",
parent = Hyperparameters,
constructor = function(
max_nrounds,
force_nrounds,
early_stopping_rounds,
# tunable
num_leaves,
max_depth,
learning_rate,
feature_fraction,
subsample,
subsample_freq,
lambda_l1,
lambda_l2,
max_cat_threshold,
min_data_per_group,
linear_tree,
ifw,
objective,
device_type,
tree_learner,
force_col_wise
) {
nrounds <- if (!is.null(force_nrounds)) {
force_nrounds
} else {
NULL
}
new_object(
Hyperparameters(
algorithm = "LightGBM",
hyperparameters = list(
nrounds = nrounds,
max_nrounds = max_nrounds,
force_nrounds = force_nrounds,
early_stopping_rounds = early_stopping_rounds,
num_leaves = num_leaves,
max_depth = max_depth,
learning_rate = learning_rate,
feature_fraction = feature_fraction,
subsample = subsample,
subsample_freq = subsample_freq,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
max_cat_threshold = max_cat_threshold,
min_data_per_group = min_data_per_group,
linear_tree = linear_tree,
ifw = ifw,
objective = objective,
device_type = device_type,
tree_learner = tree_learner,
force_col_wise = force_col_wise
),
tunable_hyperparameters = LightGBM_tunable,
fixed_hyperparameters = LightGBM_fixed
)
)
}
) # /rtemis::LightGBMHyperparameters
method(update, LightGBMHyperparameters) <- function(
object,
hyperparameters,
tuned = NULL,
...
) {
for (hp in names(hyperparameters)) {
object@hyperparameters[[hp]] <- hyperparameters[[hp]]
}
# Update tuned status
if (is.null(tuned)) {
object@tuned <- get_tuned_status(object)
} else {
object@tuned <- tuned
}
# Update nrounds (e.g. in LightRuleFit)
if (
is.null(object@hyperparameters[["nrounds"]]) &&
!is.null(object@hyperparameters[["force_nrounds"]])
) {
object@hyperparameters[["nrounds"]] <- object@hyperparameters[[
"force_nrounds"
]]
}
object
} # /update.LightGBMHyperparameters
# %% setup_LightGBM ----
# References:
# LightGBM parameters: https://lightgbm.readthedocs.io/en/latest/Parameters.html
#' Setup LightGBM Hyperparameters
#'
#' Setup hyperparameters for LightGBM training.
#'
#' Get more information from [lightgbm::lgb.train].
#'
#' @param max_nrounds Positive integer: Maximum number of boosting rounds.
#' @param force_nrounds Positive integer: Use this many boosting rounds. Disable search for nrounds.
#' @param early_stopping_rounds Positive integer: Number of rounds without improvement to stop training.
#' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree.
#' @param max_depth (Tunable) Integer: Maximum depth of trees.
#' @param learning_rate (Tunable) Numeric: Learning rate.
#' @param feature_fraction (Tunable) Numeric: Fraction of features to use.
#' @param subsample (Tunable) Numeric: Fraction of data to use.
#' @param subsample_freq (Tunable) Positive integer: Frequency of subsample.
#' @param lambda_l1 (Tunable) Numeric: L1 regularization.
#' @param lambda_l2 (Tunable) Numeric: L2 regularization.
#' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features.
#' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group.
#' @param linear_tree Logical: If TRUE, use linear trees.
#' @param objective Character: Objective function.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#' @param device_type Character: "cpu" or "gpu".
#' @param tree_learner Character: "serial", "feature", "data", or "voting".
#' @param force_col_wise Logical: Use only with CPU - If TRUE, force col-wise histogram building.
#'
#' @return LightGBMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' lightgbm_hyperparams <- setup_LightGBM(
#' max_nrounds = 500L,
#' learning_rate = c(0.001, 0.01, 0.05), ifw = TRUE
#' )
#' lightgbm_hyperparams
setup_LightGBM <- function(
# nrounds will be auto-tuned if force_nrounds is NULL with a value up to max_nrounds and
# using early_stopping_rounds.
max_nrounds = 1000L,
force_nrounds = NULL,
early_stopping_rounds = 10L,
# tunable
num_leaves = 8L,
max_depth = -1L,
learning_rate = 0.01,
feature_fraction = 1.0,
subsample = 1.0, # a.k.a. bagging_fraction {check:hyper}
subsample_freq = 1L,
lambda_l1 = 0,
lambda_l2 = 0,
max_cat_threshold = 32L,
min_data_per_group = 32L,
linear_tree = FALSE,
ifw = FALSE,
objective = NULL,
device_type = "cpu",
tree_learner = "serial",
force_col_wise = TRUE
) {
max_nrounds <- clean_posint(max_nrounds)
force_nrounds <- clean_posint(force_nrounds)
early_stopping_rounds <- clean_posint(early_stopping_rounds)
num_leaves <- clean_posint(num_leaves)
max_depth <- clean_int(max_depth)
check_floatpos1(learning_rate)
check_floatpos1(feature_fraction)
check_floatpos1(subsample)
subsample_freq <- clean_posint(subsample_freq)
check_float0pos(lambda_l1)
check_float0pos(lambda_l2)
max_cat_threshold <- clean_posint(max_cat_threshold)
min_data_per_group <- clean_posint(min_data_per_group)
check_logical(linear_tree)
LightGBMHyperparameters(
max_nrounds = max_nrounds,
force_nrounds = force_nrounds,
early_stopping_rounds = early_stopping_rounds,
num_leaves = num_leaves,
max_depth = max_depth,
learning_rate = learning_rate,
feature_fraction = feature_fraction,
subsample = subsample,
subsample_freq = subsample_freq,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
max_cat_threshold = max_cat_threshold,
min_data_per_group = min_data_per_group,
linear_tree = linear_tree,
ifw = ifw,
objective = objective,
device_type = device_type,
tree_learner = tree_learner,
force_col_wise = force_col_wise
)
} # /rtemis::setupLightGBM
# Test that all LightGBM hyperparameters are set by setup_LightGBM
stopifnot(all(
c(LightGBM_tunable, LightGBM_fixed) %in% names(formals(setup_LightGBM))
))
method(get_hyperparams_need_tuning, LightGBMHyperparameters) <- function(x) {
# Get tunable hyperparameters with more than one value
out <- x@hyperparameters[x@tunable_hyperparameters[
sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1
]]
if (is.null(x[["nrounds"]])) {
out <- c(out, list(nrounds = NULL))
}
out
} # /get_hyperparams_need_tuning.LightGBMHyperparameters
# %% LightRuleFitHyperparameters ----
LightRuleFit_tunable <- c(
"nrounds",
"num_leaves",
"max_depth",
"learning_rate",
"subsample",
"subsample_freq",
"lambda_l1",
"lambda_l2",
"alpha",
"ifw_lightgbm",
"ifw_glmnet"
)
LightRuleFit_fixed <- c("lambda", "objective")
LightRuleFit_lightgbm_params <- c(
"nrounds",
"num_leaves",
"max_depth",
"learning_rate",
"subsample",
"subsample_freq",
"lambda_l1",
"lambda_l2",
"objective"
)
LightRuleFit_glmnet_params <- c("alpha", "lambda")
#' @title LightRuleFitHyperparameters
#'
#' @description
#' Hyperparameters subclass for LightRuleFit.
#'
#' @author EDG
#' @keywords internal
#' @noRd
LightRuleFitHyperparameters <- new_class(
name = "LightRuleFitHyperparameters",
parent = Hyperparameters,
constructor = function(
nrounds,
num_leaves,
max_depth,
learning_rate,
subsample,
subsample_freq,
lambda_l1,
lambda_l2,
objective,
ifw_lightgbm,
# GLMNET
alpha,
lambda,
ifw_glmnet,
# IFW
ifw
) {
new_object(
Hyperparameters(
algorithm = "LightRuleFit",
hyperparameters = list(
nrounds = nrounds,
num_leaves = num_leaves,
max_depth = max_depth,
learning_rate = learning_rate,
subsample = subsample,
subsample_freq = subsample_freq,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
objective = objective,
ifw_lightgbm = ifw_lightgbm,
# GLMNET
alpha = alpha,
lambda = lambda,
ifw_glmnet = ifw_glmnet,
# IFW
ifw = ifw
),
tunable_hyperparameters = LightRuleFit_tunable,
fixed_hyperparameters = LightRuleFit_fixed
)
)
}
) # /rtemis::LightRuleFitHyperparameters
# %% setup_LightRuleFit ----
#' Setup LightRuleFit Hyperparameters
#'
#' Setup hyperparameters for LightRuleFit training.
#'
#' Get more information from [lightgbm::lgb.train].
#'
#' @param nrounds (Tunable) Positive integer: Number of boosting rounds.
#' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree.
#' @param max_depth (Tunable) Integer: Maximum depth of trees.
#' @param learning_rate (Tunable) Numeric: Learning rate.
#' @param subsample (Tunable) Numeric: Fraction of data to use.
#' @param subsample_freq (Tunable) Positive integer: Frequency of subsample.
#' @param lambda_l1 (Tunable) Numeric: L1 regularization.
#' @param lambda_l2 (Tunable) Numeric: L2 regularization.
#' @param objective Character: Objective function.
#' @param ifw_lightgbm (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the LightGBM
#' step.
#' @param objective Character: Objective function.
#' @param alpha (Tunable) Numeric: Alpha for GLMNET.
#' @param lambda Numeric: Lambda for GLMNET.
#' @param ifw_glmnet (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the GLMNET step.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. This applies IFW
#' to both LightGBM and GLMNET.
#'
#' @return LightRuleFitHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' lightrulefit_hyperparams <- setup_LightRuleFit(nrounds = 300L, max_depth = 3L)
#' lightrulefit_hyperparams
setup_LightRuleFit <- function(
nrounds = 200L,
num_leaves = 32L,
max_depth = 4L,
learning_rate = 0.1,
subsample = 0.666,
subsample_freq = 1L,
lambda_l1 = 0,
lambda_l2 = 0,
objective = NULL,
ifw_lightgbm = FALSE,
alpha = 1,
lambda = NULL,
ifw_glmnet = FALSE,
ifw = FALSE
) {
nrounds <- clean_posint(nrounds)
num_leaves <- clean_posint(num_leaves)
max_depth <- clean_int(max_depth)
check_floatpos1(learning_rate)
check_floatpos1(subsample)
subsample_freq <- clean_posint(subsample_freq)
check_inherits(lambda_l1, "numeric")
check_inherits(lambda_l2, "numeric")
check_float01inc(alpha)
check_inherits(lambda, "numeric")
check_logical(ifw_lightgbm)
check_logical(ifw_glmnet)
check_logical(ifw)
# If ifw, cannot have ifw_lightgbm or ifw_glmnet
if (ifw) {
if (ifw_lightgbm) {
cli::cli_abort("Cannot set ifw and ifw_lightgbm at the same time.")
}
if (ifw_glmnet) {
cli::cli_abort("Cannot set ifw and ifw_glmnet at the same time.")
}
}
LightRuleFitHyperparameters(
nrounds = nrounds,
num_leaves = num_leaves,
max_depth = max_depth,
learning_rate = learning_rate,
subsample = subsample,
subsample_freq = subsample_freq,
lambda_l1 = lambda_l1,
lambda_l2 = lambda_l2,
objective = objective,
ifw_lightgbm = ifw_lightgbm,
alpha = alpha,
lambda = lambda,
ifw_glmnet = ifw_glmnet,
ifw = ifw
)
} # /rtemis::setup_LightRuleFit
# %% IsotonicHyperparameters ----
Isotonic_tunable <- character()
Isotonic_fixed <- character()
#' @title IsotonicHyperparameters
#'
#' @description
#' Hyperparameters subclass for Isotonic Regression.
#'
#' @author EDG
#' @keywords internal
#' @noRd
IsotonicHyperparameters <- new_class(
name = "IsotonicHyperparameters",
parent = Hyperparameters,
constructor = function(ifw) {
new_object(
Hyperparameters(
algorithm = "Isotonic",
hyperparameters = list(
ifw = ifw
),
tunable_hyperparameters = "ifw",
fixed_hyperparameters = Isotonic_fixed
)
)
}
) # /rtemis::IsotonicHyperparameters
# %% setup_Isotonic ----
#' Setup Isotonic Hyperparameters
#'
#' Setup hyperparameters for Isotonic Regression.
#'
#' There are not hyperparameters for this algorithm at this moment.
#'
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return IsotonicHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' isotonic_hyperparams <- setup_Isotonic(ifw = TRUE)
#' isotonic_hyperparams
setup_Isotonic <- function(ifw = FALSE) {
IsotonicHyperparameters(ifw = ifw)
} # /rtemis::setup_Isotonic
# %% SVMHyperparameters ----
#' @title SVMHyperparameters
#'
#' @description
#' Hyperparameters subclass for SVM.
#'
#' @author EDG
#' @keywords internal
#' @noRd
SVMHyperparameters <- new_class(
name = "SVMHyperparameters",
parent = Hyperparameters,
constructor = function(
hyperparameters,
tunable_hyperparameters,
fixed_hyperparameters
) {
new_object(
Hyperparameters(
algorithm = "SVM",
hyperparameters = hyperparameters,
tunable_hyperparameters = tunable_hyperparameters,
fixed_hyperparameters = fixed_hyperparameters
)
)
} # /constructor
) # /rtemis::SVMHyperparameters
# %% LinearSVMHyperparameters ----
LinearSVM_tunable <- c("cost", "ifw")
LinearSVM_fixed <- character()
#' @title LinearSVMHyperparameters
#'
#' @description
#' Hyperparameters subclass for SVM with linear kernel.
#'
#' @author EDG
#' @keywords internal
#' @noRd
LinearSVMHyperparameters <- new_class(
name = "LinearSVMHyperparameters",
parent = Hyperparameters,
constructor = function(cost, ifw) {
new_object(
Hyperparameters(
algorithm = "LinearSVM",
hyperparameters = list(
kernel = "linear",
cost = cost,
ifw = ifw
),
tunable_hyperparameters = c("cost", "ifw"),
fixed_hyperparameters = character()
)
)
} # /constructor
) # /rtemis::LinearSVMHyperparameters
# %% setup_LinearSVM ----
#' Setup LinearSVM Hyperparameters
#'
#' Setup hyperparameters for LinearSVM training.
#'
#' Get more information from [e1071::svm].
#' @param cost (Tunable) Numeric: Cost of constraints violation.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return LinearSVMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' linear_svm_hyperparams <- setup_LinearSVM(cost = 0.5, ifw = TRUE)
#' linear_svm_hyperparams
setup_LinearSVM <- function(
cost = 1,
ifw = FALSE
) {
check_inherits(cost, "numeric")
check_logical(ifw)
LinearSVMHyperparameters(
cost = cost,
ifw = ifw
)
} # /setup_LinearSVM
# Test that all SVM hyperparameters are set by setup_SVM
stopifnot(all(
c(LinearSVM_tunable, LinearSVM_fixed) %in% names(formals(setup_LinearSVM))
))
# %% RadialSVMHyperparameters ----
RadialSVM_tunable <- c("cost", "gamma", "ifw")
RadialSVM_fixed <- character()
#' @title RadialSVMHyperparameters
#'
#' @description
#' Hyperparameters subclass for SVM with radial kernel.
#'
#' @author EDG
#' @keywords internal
#' @noRd
RadialSVMHyperparameters <- new_class(
name = "RadialSVMHyperparameters",
parent = Hyperparameters,
constructor = function(cost, gamma, ifw) {
new_object(
Hyperparameters(
algorithm = "RadialSVM",
hyperparameters = list(
kernel = "radial",
cost = cost,
gamma = gamma,
ifw = ifw
),
tunable_hyperparameters = c("cost", "gamma", "ifw"),
fixed_hyperparameters = character()
)
)
} # /constructor
) # /rtemis::RadialSVMHyperparameters
# %% setup_RadialSVM ----
#' Setup RadialSVM Hyperparameters
#'
#' Setup hyperparameters for RadialSVM training.
#'
#' Get more information from [e1071::svm].
#'
#' @param cost (Tunable) Numeric: Cost of constraints violation.
#' @param gamma (Tunable) Numeric: Kernel coefficient.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return RadialSVMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' radial_svm_hyperparams <- setup_RadialSVM(cost = 10, gamma = 0.1, ifw = TRUE)
#' radial_svm_hyperparams
setup_RadialSVM <- function(
cost = 1,
gamma = 0.01,
ifw = FALSE
) {
check_inherits(cost, "numeric")
check_inherits(gamma, "numeric")
check_logical(ifw)
RadialSVMHyperparameters(
cost = cost,
gamma = gamma,
ifw = ifw
)
} # /setup_RadialSVM
# Test that all SVM hyperparameters are set by setup_SVM
stopifnot(all(
c(RadialSVM_tunable, RadialSVM_fixed) %in% names(formals(setup_RadialSVM))
))
# %% TabNetHyperparameters ----
tabnet_tunable <- c(
"batch_size",
"penalty",
"clip_value",
"loss",
"epochs",
"drop_last",
"decision_width",
"attention_width",
"num_steps",
"feature_reusage",
"mask_type",
"virtual_batch_size",
"valid_split",
"learn_rate",
"optimizer",
"lr_scheduler",
"lr_decay",
"step_size",
"checkpoint_epochs",
"cat_emb_dim",
"num_independent",
"num_shared",
"num_independent_decoder",
"num_shared_decoder",
"momentum",
"pretraining_ratio",
"importance_sample_size",
"early_stopping_monitor",
"early_stopping_tolerance",
"early_stopping_patience",
"ifw"
)
tabnet_fixed <- c("device", "num_workers", "skip_importance")
#' @title TabNetHyperparameters
#'
#' @description
#' Hyperparameters subclass for TabNet.
#'
#' @author EDG
#' @keywords internal
#' @noRd
TabNetHyperparameters <- new_class(
name = "TabNetHyperparameters",
parent = Hyperparameters,
constructor = function(
batch_size,
penalty,
clip_value,
loss,
epochs,
drop_last,
decision_width,
attention_width,
num_steps,
feature_reusage,
mask_type,
virtual_batch_size,
valid_split,
learn_rate,
optimizer,
lr_scheduler,
lr_decay,
step_size,
checkpoint_epochs,
cat_emb_dim,
num_independent,
num_shared,
num_independent_decoder,
num_shared_decoder,
momentum,
pretraining_ratio,
device,
importance_sample_size,
early_stopping_monitor,
early_stopping_tolerance,
early_stopping_patience,
num_workers,
skip_importance,
ifw
) {
new_object(
Hyperparameters(
algorithm = "TabNet",
hyperparameters = list(
batch_size = batch_size,
penalty = penalty,
clip_value = clip_value,
loss = loss,
epochs = epochs,
drop_last = drop_last,
decision_width = decision_width,
attention_width = attention_width,
num_steps = num_steps,
feature_reusage = feature_reusage,
mask_type = mask_type,
virtual_batch_size = virtual_batch_size,
valid_split = valid_split,
learn_rate = learn_rate,
optimizer = optimizer,
lr_scheduler = lr_scheduler,
lr_decay = lr_decay,
step_size = step_size,
checkpoint_epochs = checkpoint_epochs,
cat_emb_dim = cat_emb_dim,
num_independent = num_independent,
num_shared = num_shared,
num_independent_decoder = num_independent_decoder,
num_shared_decoder = num_shared_decoder,
momentum = momentum,
pretraining_ratio = pretraining_ratio,
device = device,
importance_sample_size = importance_sample_size,
early_stopping_monitor = early_stopping_monitor,
early_stopping_tolerance = early_stopping_tolerance,
early_stopping_patience = early_stopping_patience,
num_workers = num_workers,
skip_importance = skip_importance,
ifw = ifw
),
tunable_hyperparameters = tabnet_tunable,
fixed_hyperparameters = tabnet_fixed
)
)
} # /constructor
) # /rtemis::TabNetHyperparameters
# %% setup_TabNet ----
#' Setup TabNet Hyperparameters
#'
#' Setup hyperparameters for TabNet training.
#'
# Get more information from [tabnet::tabnet_config]
#'
#' @param batch_size (Tunable) Positive integer: Batch size.
#' @param penalty (Tunable) Numeric: Regularization penalty.
#' @param clip_value Numeric: Clip value.
#' @param loss Character: Loss function.
#' @param epochs (Tunable) Positive integer: Number of epochs.
#' @param drop_last Logical: If TRUE, drop last batch.
#' @param decision_width (Tunable) Positive integer: Decision width.
#' @param attention_width (Tunable) Positive integer: Attention width.
#' @param num_steps (Tunable) Positive integer: Number of steps.
#' @param feature_reusage (Tunable) Numeric: Feature reusage.
#' @param mask_type Character: Mask type.
#' @param virtual_batch_size (Tunable) Positive integer: Virtual batch size.
#' @param valid_split Numeric: Validation split.
#' @param learn_rate (Tunable) Numeric: Learning rate.
#' @param optimizer Character or torch function: Optimizer.
#' @param lr_scheduler Character or torch function: "step", "reduce_on_plateau".
#' @param lr_decay Numeric: Learning rate decay.
#' @param step_size Positive integer: Step size.
#' @param checkpoint_epochs (Tunable) Positive integer: Checkpoint epochs.
#' @param cat_emb_dim (Tunable) Positive integer: Categorical embedding dimension.
#' @param num_independent (Tunable) Positive integer: Number of independent Gated Linear Units (GLU)
#' at each step of the encoder.
#' @param num_shared (Tunable) Positive integer: Number of shared Gated Linear Units (GLU) at each
#' step of the encoder.
#' @param num_independent_decoder (Tunable) Positive integer: Number of independent GLU layers for
#' pretraining.
#' @param num_shared_decoder (Tunable) Positive integer: Number of shared GLU layers for
#' pretraining.
#' @param momentum (Tunable) Numeric: Momentum.
#' @param pretraining_ratio (Tunable) Numeric: Pretraining ratio.
#' @param device Character: Device "cpu" or "cuda".
#' @param importance_sample_size Positive integer: Importance sample size.
#' @param early_stopping_monitor Character: Early stopping monitor. "valid_loss", "train_loss",
#' "auto".
#' @param early_stopping_tolerance Numeric: Minimum relative improvement to reset the patience
#' counter.
#' @param early_stopping_patience Positive integer: Number of epochs without improving before
#' stopping.
#' @param num_workers Positive integer: Number of subprocesses for data loacding.
#' @param skip_importance Logical: If TRUE, skip importance calculation.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return TabNetHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' tabnet_hyperparams <- setup_TabNet(epochs = 100L, learn_rate = 0.01)
#' tabnet_hyperparams
setup_TabNet <- function(
batch_size = 1024^2,
penalty = 0.001,
clip_value = NULL,
loss = "auto",
epochs = 50L,
drop_last = FALSE,
decision_width = NULL,
attention_width = NULL,
num_steps = 3L,
feature_reusage = 1.3,
mask_type = "sparsemax",
virtual_batch_size = 256^2,
valid_split = 0,
learn_rate = 0.02,
optimizer = "adam",
lr_scheduler = NULL,
lr_decay = 0.1,
step_size = 30,
checkpoint_epochs = 10L,
cat_emb_dim = 1L,
num_independent = 2L,
num_shared = 2L,
num_independent_decoder = 1L,
num_shared_decoder = 1L,
momentum = 0.02,
pretraining_ratio = 0.5,
device = "auto",
importance_sample_size = NULL,
early_stopping_monitor = "auto",
early_stopping_tolerance = 0,
early_stopping_patience = 0,
num_workers = 0L,
skip_importance = FALSE,
ifw = FALSE
) {
TabNetHyperparameters(
batch_size = batch_size,
penalty = penalty,
clip_value = clip_value,
loss = loss,
epochs = epochs,
drop_last = drop_last,
decision_width = decision_width,
attention_width = attention_width,
num_steps = num_steps,
feature_reusage = feature_reusage,
mask_type = mask_type,
virtual_batch_size = virtual_batch_size,
valid_split = valid_split,
learn_rate = learn_rate,
optimizer = optimizer,
lr_scheduler = lr_scheduler,
lr_decay = lr_decay,
step_size = step_size,
checkpoint_epochs = checkpoint_epochs,
cat_emb_dim = cat_emb_dim,
num_independent = num_independent,
num_shared = num_shared,
num_independent_decoder = num_independent_decoder,
num_shared_decoder = num_shared_decoder,
momentum = momentum,
pretraining_ratio = pretraining_ratio,
device = device,
importance_sample_size = importance_sample_size,
early_stopping_monitor = early_stopping_monitor,
early_stopping_tolerance = early_stopping_tolerance,
early_stopping_patience = early_stopping_patience,
num_workers = num_workers,
skip_importance = skip_importance,
ifw = ifw
)
} # /setup_TabNet
# Test that all TabNet hyperparameters are set by setup_TabNet
stopifnot(all(
c(tabnet_tunable, tabnet_fixed) %in% names(formals(setup_TabNet))
))
get_tabnet_config <- function(hyperparameters) {
check_is_S7(hyperparameters, TabNetHyperparameters)
hpr <- hyperparameters@hyperparameters
hpr[["ifw"]] <- NULL
do.call(tabnet::tabnet_config, hpr)
} # /get_tabnet_config
# %% RangerHyperparameters ----
ranger_tunable <- c(
"num_trees",
"mtry",
"min_node_size",
"max_depth",
"sample_fraction",
"replace",
"splitrule",
"num_random_splits",
"alpha",
"minprop",
"regularization_factor",
"ifw"
)
ranger_fixed <- c(
"importance",
"write_forest",
"probability",
"min_bucket",
"case_weights", # set by train
"class_weights", # set by train
"poisson_tau",
"split_select_weights",
"always_split_variables",
"respect_unordered_factors",
"scale_permutation_importance",
"local_importance",
"regularization_usedepth",
"keep_inbag",
"inbag",
"holdout",
"quantreg",
"time_interest",
"oob_error",
"save_memory",
"verbose",
"node_stats",
"seed",
"na_action"
)
#' @title RangerHyperparameters
#'
#' @description
#' Hyperparameters subclass for Ranger Random Forest.
#'
#' @author EDG
#' @keywords internal
#' @noRd
RangerHyperparameters <- new_class(
name = "RangerHyperparameters",
parent = Hyperparameters,
constructor = function(
num_trees,
mtry,
importance,
write_forest,
probability,
min_node_size,
min_bucket,
max_depth,
replace,
sample_fraction,
case_weights,
class_weights,
splitrule,
num_random_splits,
alpha,
minprop,
poisson_tau,
split_select_weights,
always_split_variables,
respect_unordered_factors,
scale_permutation_importance,
local_importance,
regularization_factor,
regularization_usedepth,
keep_inbag,
inbag,
holdout,
quantreg,
time_interest,
oob_error,
save_memory,
verbose,
node_stats,
seed,
na_action,
ifw
) {
new_object(
Hyperparameters(
algorithm = "Ranger",
hyperparameters = list(
num_trees = num_trees,
mtry = mtry,
importance = importance,
write_forest = write_forest,
probability = probability,
min_node_size = min_node_size,
min_bucket = min_bucket,
max_depth = max_depth,
replace = replace,
sample_fraction = sample_fraction,
case_weights = case_weights,
class_weights = class_weights,
splitrule = splitrule,
num_random_splits = num_random_splits,
alpha = alpha,
minprop = minprop,
poisson_tau = poisson_tau,
split_select_weights = split_select_weights,
always_split_variables = always_split_variables,
respect_unordered_factors = respect_unordered_factors,
scale_permutation_importance = scale_permutation_importance,
local_importance = local_importance,
regularization_factor = regularization_factor,
regularization_usedepth = regularization_usedepth,
keep_inbag = keep_inbag,
inbag = inbag,
holdout = holdout,
quantreg = quantreg,
time_interest = time_interest,
oob_error = oob_error,
save_memory = save_memory,
verbose = verbose,
node_stats = node_stats,
seed = seed,
na_action = na_action,
ifw = ifw
),
tunable_hyperparameters = ranger_tunable,
fixed_hyperparameters = ranger_fixed
)
)
} # /constructor
) # /rtemis::RangerHyperparameters
# %% setup_Ranger ----
#' Setup Ranger Hyperparameters
#'
#' Setup hyperparameters for Ranger Random Forest training.
#'
#' Get more information from [ranger::ranger].
#'
#' @param num_trees (Tunable) Positive integer: Number of trees.
#' @param mtry (Tunable) Positive integer: Number of features to consider at each split.
#' @param importance Character: Variable importance mode. "none", "impurity", "impurity_corrected", "permutation".
#' The "impurity" measure is the Gini index for classification, the variance of the responses for regression.
#' @param write_forest Logical: Save ranger.forest object, required for prediction. Set to FALSE to reduce memory usage if no prediction intended.
#' @param probability Logical: Grow a probability forest as in Malley et al. (2012). For classification only.
#' @param min_node_size (Tunable) Positive integer: Minimal node size. Default 1 for classification, 5 for regression, 3 for survival, and 10 for probability.
#' @param min_bucket Positive integer: Minimal number of samples in a terminal node. Only for survival. Deprecated in favor of min_node_size.
#' @param max_depth (Tunable) Positive integer: Maximal tree depth. A value of NULL or 0 (the default) corresponds to unlimited depth, 1 to tree stumps (1 split per tree).
#' @param replace Logical: Sample with replacement.
#' @param sample_fraction (Tunable) Numeric: Fraction of observations to sample. Default is 1 for sampling with replacement and 0.632 for sampling without replacement.
#' @param case_weights Numeric vector: Weights for sampling of training observations. Observations with larger weights will be selected with higher probability in the bootstrap (or subsampled) samples for the trees.
#' @param class_weights Numeric vector: Weights for the outcome classes for classification. Vector of the same length as the number of classes, with names corresponding to the class labels.
#' @param splitrule (Tunable) Character: Splitting rule. For classification: "gini", "extratrees", "hellinger". For regression: "variance", "extratrees", "maxstat", "beta". For survival: "logrank", "extratrees", "C", "maxstat".
#' @param num_random_splits (Tunable) Positive integer: For "extratrees" splitrule: Number of random splits to consider for each candidate splitting variable.
#' @param alpha (Tunable) Numeric: For "maxstat" splitrule: significance threshold to allow splitting.
#' @param minprop (Tunable) Numeric: For "maxstat" splitrule: lower quantile of covariate distribution to be considered for splitting.
#' @param poisson_tau Numeric: For "poisson" regression splitrule: tau parameter for Poisson regression.
#' @param split_select_weights Numeric vector: Numeric vector with weights between 0 and 1, representing the probability to select variables for splitting. Alternatively, a list of size num_trees, with one weight vector per tree.
#' @param always_split_variables Character vector: Character vector with variable names to be always selected in addition to the mtry variables tried for splitting.
#' @param respect_unordered_factors Character or logical: Handling of unordered factor covariates. For "partition" all 2^(k-1)-1 possible partitions are considered for splitting, where k is the number of factor levels. For "ignore", all factor levels are ordered by their first occurrence in the data. For "order", all factor levels are ordered by their average response. TRUE corresponds to "partition" for the randomForest package compatibility.
#' @param scale_permutation_importance Logical: Scale permutation importance by standard error as in (Breiman 2001). Only applicable if permutation variable importance mode selected.
#' @param local_importance Logical: For permutation variable importance, use local importance as in Breiman (2001) and Liaw & Wiener (2002).
#' @param regularization_factor (Tunable) Numeric: Regularization factor. Penalize variables with many split points. Requires splitrule = "variance".
#' @param regularization_usedepth Logical: Use regularization factor with node depth. Requires regularization_factor.
#' @param keep_inbag Logical: Save how often observations are in-bag in each tree. These will be used for (local) variable importance if inbag.counts in predict() is NULL.
#' @param inbag List: Manually set observations per tree. List of size num_trees, containing inbag counts for each observation. Can be used for stratified sampling.
#' @param holdout Logical: Hold-out mode. Hold-out all samples with case weight 0 and use these for variable importance and prediction error.
#' @param quantreg Logical: Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). For regression only. Set keep_inbag = TRUE to prepare out-of-bag quantile prediction.
#' @param time_interest Numeric: For GWAS data: SNP with this number will be used as time variable. Only for survival. Deprecated, use time.var in formula instead.
#' @param oob_error Logical: Compute OOB prediction error. Set to FALSE to save computation time if only the forest is needed.
#' @param save_memory Logical: Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems.
#' @param verbose Logical: Show computation status and estimated runtime.
#' @param node_stats Logical: Save additional node statistics. Only terminal nodes for now.
#' @param seed Positive integer: Random seed. Default is NULL, which generates the seed from R. Set to 0 to ignore the R seed.
#' @param na_action Character: Action to take if the data contains missing values. "na.learn" uses observations with missing values in splitting, treating missing values as a separate category.
#' @param ifw Logical: Inverse Frequency Weighting for classification. If TRUE, class weights are set inversely proportional to the class frequencies.
#'
#' @return RangerHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' ranger_hyperparams <- setup_Ranger(num_trees = 1000L, ifw = FALSE)
#' ranger_hyperparams
setup_Ranger <- function(
num_trees = 500,
mtry = NULL,
importance = "impurity",
write_forest = TRUE,
probability = FALSE,
min_node_size = NULL,
min_bucket = NULL,
max_depth = NULL,
replace = TRUE,
sample_fraction = ifelse(replace, 1, 0.632),
case_weights = NULL,
class_weights = NULL,
splitrule = NULL,
num_random_splits = 1,
alpha = 0.5,
minprop = 0.1,
poisson_tau = 1,
split_select_weights = NULL,
always_split_variables = NULL,
respect_unordered_factors = NULL,
scale_permutation_importance = FALSE,
local_importance = FALSE,
regularization_factor = 1,
regularization_usedepth = FALSE,
keep_inbag = FALSE,
inbag = NULL,
holdout = FALSE,
quantreg = FALSE,
time_interest = NULL,
oob_error = TRUE,
save_memory = FALSE,
verbose = TRUE,
node_stats = FALSE,
seed = NULL,
na_action = "na.learn",
ifw = FALSE
) {
num_trees <- clean_posint(num_trees)
mtry <- clean_posint(mtry)
check_inherits(importance, "character")
check_inherits(write_forest, "logical")
check_inherits(probability, "logical")
min_node_size <- clean_posint(min_node_size)
min_bucket <- clean_posint(min_bucket)
max_depth <- clean_posint(max_depth)
check_inherits(replace, "logical")
check_float01inc(sample_fraction)
check_inherits(case_weights, "numeric")
check_inherits(class_weights, "numeric")
check_inherits(splitrule, "character")
num_random_splits <- clean_posint(num_random_splits)
check_float01inc(alpha)
check_float01inc(minprop)
check_inherits(poisson_tau, "numeric")
check_inherits(split_select_weights, "numeric")
check_inherits(always_split_variables, "character")
check_inherits(respect_unordered_factors, "logical")
check_inherits(scale_permutation_importance, "logical")
check_inherits(local_importance, "logical")
check_inherits(regularization_factor, "numeric")
check_inherits(regularization_usedepth, "logical")
check_inherits(keep_inbag, "logical")
check_inherits(inbag, "list")
check_inherits(holdout, "logical")
check_inherits(quantreg, "logical")
check_inherits(time_interest, "numeric")
check_inherits(oob_error, "logical")
check_inherits(save_memory, "logical")
check_inherits(verbose, "logical")
check_inherits(node_stats, "logical")
check_inherits(seed, "numeric")
check_inherits(na_action, "character")
check_logical(ifw)
RangerHyperparameters(
num_trees = num_trees,
mtry = mtry,
importance = importance,
write_forest = write_forest,
probability = probability,
min_node_size = min_node_size,
min_bucket = min_bucket,
max_depth = max_depth,
replace = replace,
sample_fraction = sample_fraction,
case_weights = case_weights,
class_weights = class_weights,
splitrule = splitrule,
num_random_splits = num_random_splits,
alpha = alpha,
minprop = minprop,
poisson_tau = poisson_tau,
split_select_weights = split_select_weights,
always_split_variables = always_split_variables,
respect_unordered_factors = respect_unordered_factors,
scale_permutation_importance = scale_permutation_importance,
local_importance = local_importance,
regularization_factor = regularization_factor,
regularization_usedepth = regularization_usedepth,
keep_inbag = keep_inbag,
inbag = inbag,
holdout = holdout,
quantreg = quantreg,
time_interest = time_interest,
oob_error = oob_error,
save_memory = save_memory,
verbose = verbose,
node_stats = node_stats,
seed = seed,
na_action = na_action,
ifw = ifw
)
} # /setup_Ranger
# Test that all Ranger hyperparameters are set by setup_Ranger
stopifnot(all(
c(ranger_tunable, ranger_fixed) %in% names(formals(setup_Ranger))
))
# %% .list_to_Hyperparameters ----
#' Convert a list to a Hyperparameters object
#'
#' Internal function used by `rtemis.server` to reconstruct a `Hyperparameters`
#' object from a wire-format list. Not intended for direct use by end users.
#'
#' @param x Named list with two elements:
#' \describe{
#' \item{`algorithm`}{Character: algorithm name, e.g. `"GLM"`, `"RF"`.}
#' \item{`hyperparameters`}{Named list of hyperparameter name-value pairs
#' passed to the corresponding `setup_<algorithm>()` function.}
#' }
#'
#' @return A `Hyperparameters` object as returned by `setup_<algorithm>()`.
#'
#' @author EDG
#' @keywords internal
#' @export
.list_to_Hyperparameters <- function(x) {
fn <- paste0("setup_", x[["algorithm"]])
if (!exists(fn, mode = "function")) {
cli::cli_abort(".val Invalid algorithm: {x[['algorithm']]}.")
}
args <- x[["hyperparameters"]]
# Keep only arguments that are in the setup function
setup_formals <- names(formals(get(fn)))
args <- args[names(args) %in% setup_formals]
do.call(fn, args)
}
================================================
FILE: R/03_Metrics.R
================================================
# S7_Metrics.R
# ::rtemis::
# 2025- EDG rtemis.org
# %% Metrics ----
#' @title Metrics
#'
#' @description
#' Superclass for Metrics metrics.
#'
#' @field sample Character: Sample name.
#' @field metrics List or data.frame: Metrics.
#'
#' @author EDG
#' @noRd
Metrics <- new_class(
name = "Metrics",
properties = list(
sample = class_character | NULL,
metrics = class_list | class_data.frame
)
) # /rtemis::Metrics
# %% `$`.Metrics ----
# Make Metrics@metrics `$`-accessible
method(`$`, Metrics) <- function(x, name) {
x@metrics[[name]]
}
# %% `.DollarNames`.Metrics ----
# `$`-autocomplete Metrics@metrics
method(`.DollarNames`, Metrics) <- function(x, pattern = "") {
all_names <- names(x@metrics)
grep(pattern, all_names, value = TRUE)
}
# %% `[[`.Metrics ----
# Make Metrics@metrics `[[`-accessible
method(`[[`, Metrics) <- function(x, name) {
x@metrics[[name]]
}
# %% RegressionMetrics ----
#' @title RegressionMetrics
#'
#' @description
#' Metrics subclass for regression models.
#'
#' @author EDG
#' @noRd
RegressionMetrics <- new_class(
name = "RegressionMetrics",
parent = Metrics,
# properties = list(
# MAE = class_numeric,
# MSE = class_numeric,
# RMSE = class_numeric,
# Rsq = class_numeric
# ),
constructor = function(MAE, MSE, RMSE, Rsq, sample = NULL) {
new_object(
Metrics(
sample = sample,
metrics = data.frame(
MAE = MAE,
MSE = MSE,
RMSE = RMSE,
Rsq = Rsq
)
)
)
}
) # /rtemis::RegressionMetrics
# %% repr.RegressionMetrics ----
# Show RegressionMetrics ----
method(repr, RegressionMetrics) <- function(
x,
pad = 0L,
output_type = NULL
) {
output_type <- get_output_type(output_type)
out <- if (!is.null(x@sample)) {
repr_S7name(
paste(x@sample, "Regression Metrics"),
pad = pad,
output_type = output_type
)
} else {
repr_S7name("Regression Metrics", pad = pad, output_type = output_type)
}
out <- paste0(
out,
repr_ls(
x@metrics,
print_class = FALSE,
print_df = TRUE,
pad = pad + 2L,
output_type = output_type
)
)
out
} # /rtemis::repr.RegressionMetrics
# %% print.RegressionMetrics ----
method(print, RegressionMetrics) <- function(
x,
pad = 0L,
output_type = c("ansi", "html", "plain"),
...
) {
cat(repr(x, pad = pad, output_type = output_type))
invisible(x)
} # /rtemis::print.RegressionMetrics
# %% ClassificationMetrics ----
#' @title ClassificationMetrics
#'
#' @description
#' Metrics subclass for classification models.
#'
#' @author EDG
#' @keywords internal
#' @noRd
ClassificationMetrics <- new_class(
name = "ClassificationMetrics",
parent = Metrics,
constructor = function(
Confusion_Matrix,
Overall,
Class,
Positive_Class,
sample = NULL
) {
new_object(
Metrics(
sample = sample,
metrics = list(
Confusion_Matrix = Confusion_Matrix,
Overall = Overall,
Class = Class,
Positive_Class = Positive_Class
)
)
)
}
) # /rtemis::ClassificationMetrics
# %% repr.ClassificationMetrics ----
method(repr, ClassificationMetrics) <- function(
x,
decimal_places = 3L,
pad = 0L,
output_type = NULL,
...
) {
output_type <- get_output_type(output_type)
if (!is.null(x@sample)) {
out <- repr_S7name(
paste(x@sample, "Classification Metrics"),
pad = pad,
output_type = output_type
)
} else {
out <- repr_S7name(
"Classification Metrics",
pad = pad,
output_type = output_type
)
}
# Confusion Matrix
# suggestion: document 17 and 9
tblpad <- 17L -
max(nchar(colnames(x@metrics[["Confusion_Matrix"]])), 9L) +
pad
out <- paste0(
out,
show_table(x[["Confusion_Matrix"]], pad = tblpad, output_type = output_type)
)
out <- paste0(
out,
"\n",
show_df(
x@metrics[["Overall"]],
pad = pad,
transpose = TRUE,
ddSci_dp = decimal_places,
justify = "left",
spacing = 2L,
output_type = output_type
)
)
if (is.na(x@metrics[["Positive_Class"]])) {
out <- paste0(
out,
show_df(
x@metrics[["Class"]],
pad = pad,
transpose = TRUE,
ddSci_dp = decimal_places,
justify = "left",
spacing = 2,
output_type = output_type
)
)
} else {
out <- paste0(
out,
"\n Positive Class ",
fmt(
x@metrics[["Positive_Class"]],
col = highlight_col,
bold = TRUE,
output_type = output_type
),
"\n"
)
}
out
} # /rtemis::repr.ClassificationMetrics
# %% print.ClassificationMetrics ----
method(print, ClassificationMetrics) <- function(
x,
decimal_places = 3,
pad = 0L,
output_type = c("ansi", "html", "plain"),
...
) {
cat(repr(
x,
decimal_places = decimal_places,
pad = pad,
output_type = output_type
))
invisible(x)
} # /rtemis::print.ClassificationMetrics
# %% MetricsRes ----
#' @title MetricsRes
#'
#' @description
#' Superclass for MetricsRes metrics.
#'
#' @field sample Character: Sample name.
#'
#' @author EDG
#' @noRd
MetricsRes <- new_class(
name = "MetricsRes",
properties = list(
sample = class_character | NULL,
res_metrics = class_list,
mean_metrics = class_data.frame,
sd_metrics = class_data.frame
)
) # /rtemis::MetricsRes
# %% repr.MetricsRes ----
method(repr, MetricsRes) <- function(
x,
decimal_places = 3L,
pad = 0L,
output_type = NULL
) {
output_type <- get_output_type(output_type)
type <- if (S7_inherits(x, RegressionMetricsRes)) {
"Regression"
} else {
"Classification"
}
out <- repr_S7name(
paste("Resampled", type, x@sample, "Metrics"),
pad = pad,
output_type = output_type
)
out <- paste0(out, strrep(" ", pad))
out <- paste0(
out,
italic(" Showing mean (sd) across resamples.\n", output_type = output_type)
)
# Create list with mean_metrics (sd_metrics)
metricsl <- lapply(seq_along(x@mean_metrics), function(i) {
paste0(
ddSci(x@mean_metrics[[i]], decimal_places),
gray(
paste0(" (", ddSci(x@sd_metrics[[i]], decimal_places), ")"),
output_type = output_type
)
)
})
names(metricsl) <- names(x@mean_metrics)
out <- paste0(
out,
repr_ls(
metricsl,
print_class = FALSE,
print_df = TRUE,
pad = pad + 2L,
output_type = output_type
)
)
out
} # /rtemis::repr.MetricsRes
# %% print.MetricsRes ----
method(print, MetricsRes) <- function(
x,
decimal_places = 3L,
pad = 0L,
output_type = NULL,
...
) {
cat(repr(x, decimal_places, pad = pad, output_type = output_type))
invisible(x)
} # /rtemis::print.MetricsRes
# %% RegressionMetricsRes ----
#' @author EDG
#' @noRd
RegressionMetricsRes <- new_class(
name = "RegressionMetricsRes",
parent = MetricsRes,
constructor = function(sample, res_metrics) {
new_object(
MetricsRes(
sample = sample,
res_metrics = res_metrics,
mean_metrics = vec2df(
colMeans(do.call(rbind, lapply(res_metrics, function(x) x@metrics)))
),
sd_metrics = vec2df(
sapply(do.call(rbind, lapply(res_metrics, function(x) x@metrics)), sd)
)
)
)
}
) # /rtemis::RegressionMetricsRes
#' @author EDG
#' @noRd
ClassificationMetricsRes <- new_class(
name = "ClassificationMetricsRes",
parent = MetricsRes,
constructor = function(sample, res_metrics) {
new_object(
MetricsRes(
sample = sample,
res_metrics = res_metrics,
mean_metrics = vec2df(
colMeans(do.call(
rbind,
lapply(res_metrics, function(x) x@metrics[["Overall"]])
))
),
sd_metrics = vec2df(
sapply(
do.call(
rbind,
lapply(res_metrics, function(x) x@metrics[["Overall"]])
),
sd
)
)
)
)
}
) # /rtemis::ClassificationMetricsRes
# %% repr.CalibratedClassification ----
#' @param x `ClassificationMetrics` before calibration.
#' @param x_cal `ClassificationMetrics` after calibration.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
repr_CalibratedClassificationMetrics <- function(
x,
x_cal,
decimal_places = 2L,
pad = 2L,
output_type = NULL
) {
output_type <- get_output_type(output_type)
if (!is.null(x@sample)) {
out <- repr_S7name(
paste(x@sample, "Classification Metrics (Pre => Post Calibration)"),
pad = pad,
output_type = output_type
)
} else {
out <- repr_S7name(
"Classification Metrics (Pre => Post Calibration)",
pad = pad,
output_type = output_type
)
}
# Confusion Matrix: Pre=>Post
prepost_cm <- paste_tables(
x@metrics[["Confusion_Matrix"]],
x_cal@metrics[["Confusion_Matrix"]],
sep = " => "
)
tblpad <- 17L -
max(nchar(colnames(prepost_cm)), 9L) +
pad
out <- paste0(
out,
show_table(prepost_cm, pad = tblpad, output_type = output_type)
)
# Overall metrics: Pre=>Post
# Note: decimal formatting handled by paste_dfs with decimal_places parameter
out <- paste0(
out,
"\n",
show_df(
paste_dfs(
x@metrics[["Overall"]],
x_cal@metrics[["Overall"]],
sep = " => ",
decimal_places = decimal_places
),
pad = pad,
transpose = TRUE,
ddSci_dp = NULL,
justify = "left",
spacing = 2L,
output_type = output_type
)
)
# Class metrics: Pre=>Post (for multiclass) or Positive Class (for binary)
if (is.na(x@metrics[["Positive_Class"]])) {
out <- paste0(
out,
show_df(
paste_dfs(
x@metrics[["Class"]],
x_cal@metrics[["Class"]],
decimal_places = decimal_places
),
pad = pad,
transpose = TRUE,
ddSci_dp = NULL,
justify = "left",
spacing = 2,
output_type = output_type
)
)
} else {
out <- paste0(
out,
"\n Positive Class ",
fmt(
x@metrics[["Positive_Class"]],
col = highlight_col,
bold = TRUE,
output_type = output_type
),
"\n"
)
}
out
} # /rtemis::repr_CalibratedClassification
# %% repr.CalibratedClassificationResMetrics ----
#' @param x `ClassificationMetricsRes` before calibration.
#' @param x_cal `ClassificationMetricsRes` after calibration.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
repr_CalibratedClassificationResMetrics <- function(
x,
x_cal,
decimal_places = 2L,
pad = 2L,
output_type = NULL
) {
output_type <- get_output_type(output_type)
out <- repr_S7name(
paste(
"Resampled Classification",
x@sample,
"Metrics (Pre => Post Calibration)"
),
pad = pad,
output_type = output_type
)
out <- paste0(out, strrep(" ", pad))
out <- paste0(
out,
italic(
" Showing mean (sd) across resamples, Pre => Post calibration.\n",
output_type = output_type
)
)
# Create pre and post formatted strings: mean (sd)
pre_strings <- lapply(seq_along(x@mean_metrics), function(i) {
paste0(
ddSci(x@mean_metrics[[i]], decimal_places),
gray(
paste0(" (", ddSci(x@sd_metrics[[i]], decimal_places), ")"),
output_type = output_type
)
)
})
names(pre_strings) <- names(x@mean_metrics)
post_strings <- lapply(seq_along(x_cal@mean_metrics), function(i) {
paste0(
ddSci(x_cal@mean_metrics[[i]], decimal_places),
gray(
paste0(" (", ddSci(x_cal@sd_metrics[[i]], decimal_places), ")"),
output_type = output_type
)
)
})
names(post_strings) <- names(x_cal@mean_metrics)
# Combine pre=>post
prepost_strings <- lapply(seq_along(pre_strings), function(i) {
paste(pre_strings[[i]], post_strings[[i]], sep = " => ")
})
names(prepost_strings) <- names(pre_strings)
out <- paste0(
out,
repr_ls(
prepost_strings,
print_class = FALSE,
print_df = TRUE,
pad = pad + 2L,
output_type = output_type
)
)
out
} # /rtemis::repr_CalibratedClassificationResMetrics
================================================
FILE: R/04_Preprocessor.R
================================================
# S7_Preprocessor.R
# ::rtemis::
# 2025- EDG rtemis.org
# References
# https://github.com/RConsortium/S7/
# https://rconsortium.github.io/S7
# %% PreprocessorConfig ----
#' @title PreprocessorConfig
#'
#' @description
#' PreprocessorConfig class.
#'
#' @author EDG
#' @noRd
PreprocessorConfig <- new_class(
name = "PreprocessorConfig",
properties = list(
complete_cases = class_logical,
remove_features_thres = class_numeric | NULL,
remove_cases_thres = class_numeric | NULL,
missingness = class_logical,
impute = class_logical,
impute_type = class_character,
impute_missRanger_params = class_list,
impute_discrete = class_character,
impute_continuous = class_character,
integer2factor = class_logical,
integer2numeric = class_logical,
logical2factor = class_logical,
logical2numeric = class_logical,
numeric2factor = class_logical,
numeric2factor_levels = class_character | NULL,
numeric_cut_n = class_numeric,
numeric_cut_labels = class_logical,
numeric_quant_n = class_numeric,
numeric_quant_NAonly = class_logical,
unique_len2factor = class_numeric,
character2factor = class_logical,
factorNA2missing = class_logical,
factorNA2missing_level = class_character,
factor2integer = class_logical,
factor2integer_startat0 = class_logical,
scale = class_logical,
center = class_logical,
scale_centers = class_numeric | NULL,
scale_coefficients = class_numeric | NULL,
remove_constants = class_logical,
remove_constants_skip_missing = class_logical,
remove_duplicates = class_logical,
remove_features = class_character | NULL,
one_hot = class_logical,
one_hot_levels = class_list | NULL,
add_date_features = class_logical,
date_features = class_character,
add_holidays = class_logical,
exclude = class_character | NULL
)
) # /PreprocessorConfig
# %% names.PreprocessorConfig ----
# Names PreprocessorConfig
method(names, PreprocessorConfig) <- function(x) {
names(props(x))
}
# %% `$`.PreprocessorConfig ----
# Make props `$`-accessible
method(`$`, PreprocessorConfig) <- function(x, name) {
props(x)[[name]]
}
# %% `.DollarNames`.PreprocessorConfig ----
# DollarSign tab-complete property names
method(`.DollarNames`, PreprocessorConfig) <- function(x, pattern = "") {
all_names <- names(props(x))
grep(pattern, all_names, value = TRUE)
}
# %% `[[`.PreprocessorConfig ----
# Make proprs `[[`-accessible
method(`[[`, PreprocessorConfig) <- function(x, name) {
props(x)[[name]]
}
# %% repr.PreprocessorConfig ----
method(repr, PreprocessorConfig) <- function(
x,
limit = -1L,
pad = 0L,
output_type = NULL
) {
output_type <- get_output_type(output_type)
paste0(
repr_S7name("PreprocessorConfig", pad = pad, output_type = output_type),
repr_ls(props(x), pad = pad, limit = limit, output_type = output_type)
)
} # /rtemis::repr.PreprocessorConfig
# %% print.PreprocessorConfig ----
method(print, PreprocessorConfig) <- function(
x,
limit = -1L,
output_type = NULL,
...
) {
cat(repr(x, limit = limit, output_type = output_type))
invisible(x)
} # /rtemis::print.PreprocessorConfig
# %% setup_Preprocessor ----
#' Setup Preprocessor
#'
#' @description
#' Creates a `PreprocessorConfig` object, which can be used in [preprocess].
#'
#' @param complete_cases Logical: If TRUE, only retain complete cases (no missing data).
#' @param remove_cases_thres Float (0, 1): Remove cases with >= to this fraction
#' of missing features.
#' @param remove_features_thres Float (0, 1): Remove features with missing
#' values in >= to this fraction of cases.
#' @param missingness Logical: If TRUE, generate new boolean columns for each
#' feature with missing values, indicating which cases were missing data.
#' @param impute Logical: If TRUE, impute missing cases. See `impute_discrete` and
#' `impute_continuous`.
#' @param impute_type Character: Package to use for imputation.
#' @param impute_missRanger_params Named list with elements "pmm.k" and
#' "maxiter", which are passed to `missRanger::missRanger`. `pmm.k`
#' greater than 0 results in predictive mean matching. Default `pmm.k = 3`
#' `maxiter = 10` `num.trees = 500`. Reduce `num.trees` for
#' faster imputation especially in large datasets. Set `pmm.k = 0` to
#' disable predictive mean matching.
#' @param impute_discrete Character: Name of function that returns single value: How to impute
#' discrete variables for `impute_type = "meanMode"`.
#' @param impute_continuous Character: Name of function that returns single value: How to impute
#' continuous variables for `impute_type = "meanMode"`.
#' @param integer2factor Logical: If TRUE, convert all integers to factors. This includes
#' `bit64::integer64` columns.
#' @param integer2numeric Logical: If TRUE, convert all integers to numeric
#' (will only work if `integer2factor = FALSE`). This includes
#' `bit64::integer64` columns.
#' @param logical2factor Logical: If TRUE, convert all logical variables to
#' factors.
#' @param logical2numeric Logical: If TRUE, convert all logical variables to
#' numeric.
#' @param numeric2factor Logical: If TRUE, convert all numeric variables to
#' factors.
#' @param numeric2factor_levels Character vector: Optional - will be passed to
#' `levels` arg of `factor()` if `numeric2factor = TRUE`. For advanced/
#' specific use cases; need to know unique values of numeric vector(s) and given all
#' numeric vars have same unique values.
#' @param numeric_cut_n Integer: If > 0, convert all numeric variables to factors by
#' binning using `base::cut` with `breaks` equal to this number.
#' @param numeric_cut_labels Logical: The `labels` argument of [base::cut].
#' @param numeric_quant_n Integer: If > 0, convert all numeric variables to factors by
#' binning using `base::cut` with `breaks` equal to this number of quantiles.
#' produced using `stats::quantile`.
#' @param numeric_quant_NAonly Logical: If TRUE, only bin numeric variables with
#' missing values.
#' @param unique_len2factor Integer (>=2): Convert all variables with less
#' than or equal to this number of unique values to factors.
#' For example, if binary variables are encoded with 1, 2, you could use
#' `unique_len2factor = 2` to convert them to factors.
#' @param character2factor Logical: If TRUE, convert all character variables to
#' factors.
#' @param factorNA2missing Logical: If TRUE, make NA values in factors be of
#' level `factorNA2missing_level`. In many cases this is the preferred way
#' to handle missing data in categorical variables. Note that since this step
#' is performed before imputation, you can use this option to handle missing
#' data in categorical variables and impute numeric variables in the same
#' `preprocess` call.
#' @param factorNA2missing_level Character: Name of level if
#' `factorNA2missing = TRUE`.
#' @param factor2integer Logical: If TRUE, convert all factors to integers.
#' @param factor2integer_startat0 Logical: If TRUE, start integer coding at 0.
#' @param scale Logical: If TRUE, scale columns of `x`.
#' @param center Logical: If TRUE, center columns of `x`. Note that by
#' default it is the same as `scale`.
#' @param scale_centers Named vector: Centering values for each feature.
#' @param scale_coefficients Named vector: Scaling values for each feature.
#' @param remove_constants Logical: If TRUE, remove constant columns.
#' @param remove_constants_skip_missing Logical: If TRUE, skip missing values, before
#' checking if feature is constant.
#' @param remove_features Character vector: Features to remove.
#' @param remove_duplicates Logical: If TRUE, remove duplicate cases.
#' @param one_hot Logical: If TRUE, convert all factors using one-hot encoding.
#' @param one_hot_levels List: Named list of the form "feature_name" = "levels". Used when applying
#' one-hot encoding to validation or test data using `Preprocessor`.
#' @param add_date_features Logical: If TRUE, extract date features from date columns.
#' @param date_features Character vector: Features to extract from dates.
#' @param add_holidays Logical: If TRUE, extract holidays from date columns.
#' @param exclude Integer, vector: Exclude these columns from preprocessing.
#'
#' @section Order of Operations:
#'
#' * keep complete cases only
#' * remove constants
#' * remove duplicates
#' * remove cases by missingness threshold
#' * remove features by missingness threshold
#' * integer to factor
#' * integer to numeric
#' * logical to factor
#' * logical to numeric
#' * numeric to factor
#' * cut numeric to n bins
#' * cut numeric to n quantiles
#' * numeric with less than N unique values to factor
#' * character to factor
#' * factor NA to named level
#' * add missingness column
#' * impute
#' * scale and/or center
#' * one-hot encoding
#'
#' @return `PreprocessorConfig` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' preproc_config <- setup_Preprocessor(factorNA2missing = TRUE)
#' preproc_config
setup_Preprocessor <- function(
complete_cases = FALSE,
remove_features_thres = NULL,
remove_cases_thres = NULL,
missingness = FALSE,
impute = FALSE,
impute_type = c(
"missRanger",
"micePMM",
"meanMode"
),
impute_missRanger_params = list(
pmm.k = 3,
maxiter = 10,
num.trees = 500
),
impute_discrete = "get_mode",
impute_continuous = "mean",
integer2factor = FALSE,
integer2numeric = FALSE,
logical2factor = FALSE,
logical2numeric = FALSE,
numeric2factor = FALSE,
numeric2factor_levels = NULL,
numeric_cut_n = 0,
numeric_cut_labels = FALSE,
numeric_quant_n = 0,
numeric_quant_NAonly = FALSE,
unique_len2factor = 0,
character2factor = FALSE,
factorNA2missing = FALSE,
factorNA2missing_level = "missing",
# nonzeroFactors = FALSE,
factor2integer = FALSE,
factor2integer_startat0 = TRUE,
scale = FALSE,
center = scale,
scale_centers = NULL,
scale_coefficients = NULL,
remove_constants = FALSE,
remove_constants_skip_missing = TRUE,
remove_features = NULL,
remove_duplicates = FALSE,
one_hot = FALSE,
one_hot_levels = NULL,
# cleanfactorlevels = FALSE,
add_date_features = FALSE,
date_features = c("weekday", "month", "year"),
add_holidays = FALSE,
exclude = NULL
) {
# Match args
impute_type <- match.arg(impute_type)
# Checks performed in the `PreprocessorConfig` constructor
PreprocessorConfig(
complete_cases = complete_cases,
remove_features_thres = remove_features_thres,
remove_cases_thres = remove_cases_thres,
missingness = missingness,
impute = impute,
impute_type = impute_type,
impute_missRanger_params = impute_missRanger_params,
impute_discrete = impute_discrete,
impute_continuous = impute_continuous,
integer2factor = integer2factor,
integer2numeric = integer2numeric,
logical2factor = logical2factor,
logical2numeric = logical2numeric,
numeric2factor = numeric2factor,
numeric2factor_levels = numeric2factor_levels,
numeric_cut_n = numeric_cut_n,
numeric_cut_labels = numeric_cut_labels,
numeric_quant_n = numeric_quant_n,
numeric_quant_NAonly = numeric_quant_NAonly,
unique_len2factor = unique_len2factor,
character2factor = character2factor,
factorNA2missing = factorNA2missing,
factorNA2missing_level = factorNA2missing_level,
factor2integer = factor2integer,
factor2integer_startat0 = factor2integer_startat0,
scale = scale,
center = center,
scale_centers = scale_centers,
scale_coefficients = scale_coefficients,
remove_constants = remove_constants,
remove_constants_skip_missing = remove_constants_skip_missing,
remove_features = remove_features,
remove_duplicates = remove_duplicates,
one_hot = one_hot,
one_hot_levels = one_hot_levels,
add_date_features = add_date_features,
date_features = date_features,
add_holidays = add_holidays,
exclude = exclude
)
} # /setup_Preprocessor
# Note:
# data_dependent_props <- c(
# "scale_centers", # Named vector with feature scaling centers.
# "scale_coefficients", # Named vector with feature scaling coefficients.
# "one_hot_levels", # Named list of the form "feature_name" = "levels".
# "remove_features" # Character vector of feature names to remove.
# )
# %% Preprocessor ----
#' @title Preprocessor
#'
#' @description
#' Class to hold output of preprocessing values after applying `PreprocessorConfig` to
#' training dataset, so that the same preprocessing can be applied to validation and test
#' datasets.
#'
#' @field config `PreprocessorConfig` object.
#' @field preprocessed Data frame or list: Preprocessed data. If a single data.frame is passed to
#' `preprocess`, this will be a data.frame. If additional data sets are passed to the
#' `dat_validation` and/or `dat_test` arguments, this will be a named list.
#' @field values List: Data-dependent preprocessing values to be used for validation and test set
#' preprocessing.
#'
#' @author EDG
#' @noRd
Preprocessor <- new_class(
name = "Preprocessor",
properties = list(
config = PreprocessorConfig,
preprocessed = class_data.frame | class_list,
values = class_list
),
constructor = function(
config,
preprocessed,
scale_centers = NULL,
scale_coefficients = NULL,
one_hot_levels = NULL,
remove_features = NULL
) {
new_object(
S7_object(),
config = config,
preprocessed = preprocessed,
values = list(
scale_centers = scale_centers,
scale_coefficients = scale_coefficients,
one_hot_levels = one_hot_levels,
remove_features = remove_features
)
)
}
) # /Preprocessor
# %% repr.Preprocessor ----
method(repr, Preprocessor) <- function(
x,
pad = 0L,
print_df = FALSE,
output_type = NULL
) {
output_type <- get_output_type(output_type)
paste0(
repr_S7name("Preprocessor", pad = pad, output_type = output_type),
repr_ls(props(x), pad = pad, print_df = print_df)
)
} # /rtemis::repr.Preprocessor
# %% print.Preprocessor ----
method(print, Preprocessor) <- function(x, pad = 0L, output_type = NULL, ...) {
cat(repr(x, output_type = output_type))
invisible(x)
} # /rtemis::print.Preprocessor
# %% names.Preprocessor ----
method(names, Preprocessor) <- function(x) {
names(props(x))
}
# %% `$`.Preprocessor ----
# Make props `$`-accessible
method(`$`, Preprocessor) <- function(x, name) {
props(x)[[name]]
}
# %% `.DollarNames`.Preprocessor ----
# DollarSign tab-complete property names
method(`.DollarNames`, Preprocessor) <- function(x, pattern = "") {
all_names <- names(props(x))
grep(pattern, all_names, value = TRUE)
}
# %% `[`.Preprocessor ----
# Make props `[`-accessible
method(`[`, Preprocessor) <- function(x, name) {
props(x)[[name]]
}
# %% `[[`.Preprocessor ----
# Make props `[[`-accessible
method(`[[`, Preprocessor) <- function(x, name) {
props(x)[[name]]
}
# %% preprocessed.Preprocessor ----
method(preprocessed, Preprocessor) <- function(x) {
x@preprocessed
}
================================================
FILE: R/05_Resampler.R
================================================
# S7_Resampler.R
# ::rtemis::
# 2025- EDG rtemis.org
# References
# https://github.com/RConsortium/S7/
# https://rconsortium.github.io/S7
# Description
# `ResamplerConfig` class and subclasses create objects that store resampling configuration.
# They are set by `setup_Resampler()` and perform type checking and validation.
# They are used by `resample()`.
# `Resampler` class stores resamples and their configuration.
# `Resampler` objects are created by `resample()`.
# Note: `id_strat` is used by `resample()`, not individual resamplers
# %% ResamplerConfig ----
#' @title ResamplerConfig
#'
#' @description
#' Superclass for resampler configuration.
#'
#' @field type Character: Type of resampler.
#' @field n Integer: Number of resamples.
#'
#' @author EDG
#' @noRd
ResamplerConfig <- new_class(
name = "ResamplerConfig",
properties = list(
type = class_character,
n = class_integer # scalar_int_pos
),
constructor = function(type, n) {
# LOOCV does not have a defined number of resamples, so n can be NA_integer_
n <- clean_posint(n, allow_na = TRUE)
new_object(
S7_object(),
type = type,
n = n
)
}
) # /rtemis::ResamplerConfig
# %% `$`.ResamplerConfig ----
# Make S7 properties `$`-accessible
method(`$`, ResamplerConfig) <- function(x, name) {
prop(x, name)
}
# %% `[[`.ResamplerConfig ----
# Make S7 properties `[[`-accessible
method(`[[`, ResamplerConfig) <- function(x, name) {
prop(x, name)
}
# %% repr.ResamplerConfig ----
#' repr ResamplerConfig
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(repr, ResamplerConfig) <- function(x, pad = 0L, output_type = NULL) {
output_type <- get_output_type(output_type)
paste0(
repr_S7name(x, pad = pad, output_type = output_type),
repr_ls(
props(x)[-1],
pad = pad,
print_class = FALSE,
output_type = output_type
)
)
} # /rtemis::repr.ResamplerConfig
# %% print.ResamplerConfig ----
#' Print ResamplerConfig
#'
#' @description
#' print ResamplerConfig object
#'
#' @param x ResamplerConfig object
#'
#' @author EDG
#' @noRd
method(print, ResamplerConfig) <- function(
x,
pad = 0L,
output_type = c("ansi", "html", "plain"),
...
) {
cat(repr(x, pad = pad, output_type = output_type))
invisible(x)
} # /rtemis::print.ResamplerConfig
# %% desc.ResamplerConfig ----
method(desc, ResamplerConfig) <- function(x) {
switch(
x@type,
KFold = paste0(x@n, " independent folds"),
StratSub = paste0(x@n, " stratified subsamples"),
StratBoot = paste0(x@n, " stratified bootstraps"),
Bootstrap = paste0(x@n, " bootstrap resamples"),
Custom = paste0(x@n, " custom resamples"),
LOOCV = paste0(x@n, " leave-one-out folds"),
paste0(x@n, " resamples")
)
} # /rtemis::desc.ResamplerConfig
# %% KFoldConfig ----
#' @title KFoldConfig
#'
#' @description
#' ResamplerConfig subclass for k-fold resampling.
#'
#' @author EDG
#' @noRd
KFoldConfig <- new_class(
name = "KFoldConfig",
parent = ResamplerConfig,
properties = list(
stratify_var = class_character | NULL,
strat_n_bins = scalar_int_pos,
id_strat = class_vector | NULL,
seed = scalar_int_pos
),
constructor = function(n, stratify_var, strat_n_bins, id_strat, seed) {
new_object(
ResamplerConfig(
type = "KFold",
n = n
),
stratify_var = stratify_var,
strat_n_bins = strat_n_bins,
id_strat = id_strat,
seed = seed
)
}
) # /rtemis::KFoldConfig
# %% StratSubConfig ----
#' @title StratSubConfig
#'
#' @description
#' ResamplerConfig subclass for stratified subsampling.
#'
#' @author EDG
#' @noRd
StratSubConfig <- new_class(
name = "StratSubConfig",
parent = ResamplerConfig,
properties = list(
n = scalar_int_pos,
train_p = scalar_dbl_01excl,
stratify_var = class_character | NULL,
strat_n_bins = scalar_int_pos,
id_strat = class_vector | NULL,
seed = scalar_int_pos
),
constructor = function(
n,
train_p,
stratify_var,
strat_n_bins,
id_strat,
seed
) {
new_object(
ResamplerConfig(
type = "StratSub",
n = n
),
train_p = train_p,
stratify_var = stratify_var,
strat_n_bins = strat_n_bins,
id_strat = id_strat,
seed = seed
)
}
) # /rtemis::StratSubConfig
# %% StratBootConfig ----
#' @title StratBootConfig
#'
#' @description
#' ResamplerConfig subclass for stratified bootstrapping.
#'
#' @author EDG
#' @noRd
StratBootConfig <- new_class(
name = "StratBootConfig",
parent = ResamplerConfig,
properties = list(
stratify_var = class_character | NULL,
train_p = scalar_dbl_01excl,
strat_n_bins = scalar_int_pos,
target_length = scalar_int_pos,
id_strat = class_vector | NULL,
seed = scalar_int_pos
),
constructor = function(
n,
stratify_var,
train_p,
strat_n_bins,
target_length,
id_strat,
seed
) {
new_object(
ResamplerConfig(
type = "StratBoot",
n = n
),
stratify_var = stratify_var,
train_p = train_p,
strat_n_bins = strat_n_bins,
target_length = target_length,
id_strat = id_strat,
seed = seed
)
}
) # /rtemis::StratBootConfig
# %% BootstrapConfig ----
#' @title BootstrapConfig
#'
#' @description
#' ResamplerConfig subclass for bootstrap resampling.
#'
#' @author EDG
#' @noRd
BootstrapConfig <- new_class(
name = "BootstrapConfig",
parent = ResamplerConfig,
properties = list(
id_strat = class_vector | NULL,
seed = scalar_int_pos
),
constructor = function(n, id_strat, seed) {
new_object(
ResamplerConfig(
type = "Bootstrap",
n = n
),
id_strat = id_strat,
seed = seed
)
}
) # /rtemis::BootstrapConfig
# %% LOOCVConfig ----
#' @title LOOCVConfig
#'
#' @description
#' ResamplerConfig subclass for leave-one-out cross-validation.
#'
#' @author EDG
#' @noRd
LOOCVConfig <- new_class(
name = "LOOCVConfig",
parent = ResamplerConfig,
constructor = function(n) {
new_object(
ResamplerConfig(
type = "LOOCV",
n = n
)
)
}
) # /rtemis::LOOCVConfig
# %% CustomConfig ----
#' @title CustomConfig
#'
#' @description
#' ResamplerConfig subclass for custom resampling.
#'
#' @author EDG
#' @noRd
CustomConfig <- new_class(
name = "CustomConfig",
parent = ResamplerConfig,
constructor = function(n) {
new_object(
ResamplerConfig(
type = "Custom",
n = n
)
)
}
) # /rtemis::CustomConfig
# %% setup_Resampler ----
#' Setup Resampler
#'
#' @param n_resamples Integer: Number of resamples to make.
#' @param type Character: Type of resampler: "KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"
#' @param stratify_var Character: Variable to stratify by.
#' @param train_p Float: Training set percentage.
#' @param strat_n_bins Integer: Number of bins to stratify by.
#' @param target_length Integer: Target length for stratified bootstraps.
#' @param id_strat Integer: Vector of indices to stratify by. These may be, for example, case IDs
#' if your dataset contains repeated measurements. By specifying this vector, you can ensure that
#' each case can only be present in the training or test set, but not both.
#' @param seed Integer: Random seed.
#' @param verbosity Integer: Verbosity level.
#'
#' @return ResamplerConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' tenfold_resampler <- setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L)
#' tenfold_resampler
setup_Resampler <- function(
n_resamples = 10L,
type = c("KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"),
# index = NULL,
# group = NULL,
stratify_var = NULL,
train_p = .75,
strat_n_bins = 4L,
target_length = NULL,
id_strat = NULL,
seed = NULL,
verbosity = 1L
) {
# Arguments
type <- match_arg(
type,
c("KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV")
)
if (length(type) == 0) {
cli::cli_abort(
"Invalid resampler type. Must be one of: 'StratSub', 'StratBoot', 'KFold', 'Bootstrap', 'LOOCV'"
)
}
seed <- clean_int(seed)
if (type == "KFold") {
KFoldConfig(
n = n_resamples,
stratify_var = stratify_var,
strat_n_bins = strat_n_bins,
id_strat = id_strat,
seed = seed
)
} else if (type == "StratSub") {
StratSubConfig(
n = n_resamples,
train_p = train_p,
stratify_var = stratify_var,
strat_n_bins = strat_n_bins,
id_strat = id_strat,
seed = seed
)
} else if (type == "StratBoot") {
StratBootConfig(
n = n_resamples,
train_p = train_p,
stratify_var = stratify_var,
strat_n_bins = strat_n_bins,
target_length = target_length,
id_strat = id_strat,
seed = seed
)
} else if (type == "Bootstrap") {
BootstrapConfig(
n = n_resamples,
id_strat = id_strat,
seed = seed
)
} else if (type == "LOOCV") {
LOOCVConfig(
n = NA_integer_
)
} else {
cli::cli_abort(paste(
"Resampler'",
type,
"'is not supported.",
"Supported types are: 'KFold', 'StratSub', 'StratBoot', 'Bootstrap', 'LOOCV'."
))
}
} # /rtemis::setup_Resampler
# %% Resampler ----
#' @title Resampler
#'
#' @description
#' Class for resampling objects.
#'
#' @author EDG
#' @noRd
Resampler <- new_class(
name = "Resampler",
properties = list(
type = class_character,
resamples = class_list,
config = ResamplerConfig
)
) # /rtemis::Resampler
# %% repr.Resampler ----
#' repr Resampler
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(repr, Resampler) <- function(x, pad = 0L, output_type = NULL) {
output_type <- get_output_type(output_type)
paste0(
repr_S7name(x, pad = pad, output_type = output_type),
repr_ls(
props(x),
pad = pad,
print_class = FALSE,
output_type = output_type
)
)
} # /rtemis::repr.Resampler
# %% print.Resampler ----
method(print, Resampler) <- function(
x,
output_type = c("ansi", "html", "plain"),
...
) {
cat(repr(x, output_type = output_type))
invisible(x)
}
# %% names.Resampler ----
method(names, Resampler) <- function(x) {
names(x@resamples)
}
# %% `$`.Resampler ----
# Access Resampler$resamples resamples using `$` ----
method(`$`, Resampler) <- function(x, name) {
x@resamples[[name]]
}
# %% `.DollarNames`.Resampler ----
# DollarSign tab-complete Resampler@resamples names
method(`.DollarNames`, Resampler) <- function(x, pattern = "") {
all_names <- names(x@resamples)
grep(pattern, all_names, value = TRUE)
}
# %% `[[`.Resampler ----
# Access Resampler$resamples resamples using `[[` ----
method(`[[`, Resampler) <- function(x, index) {
x@resamples[[index]]
}
# %% desc.Resampler ----
method(desc, Resampler) <- function(x) {
desc(x@config)
}
# %% --- Internal functions ----
# %% .list_to_ResamplerConfig ----
#' Convert a list to a ResamplerConfig object
#'
#' Internal function used by `rtemis.server` and `SuperConfig` deserialization
#' to reconstruct a `ResamplerConfig` object from a named list. Not intended
#' for direct use by end users.
#'
#' @param x Named list with the following elements:
#' \describe{
#' \item{`type`}{Character: resampler type — one of `"KFold"`,
#' `"StratSub"`, `"StratBoot"`, `"Bootstrap"`, `"LOOCV"`, `"Custom"`.}
#' \item{`n`}{Integer: number of resamples (not used for `"LOOCV"`).}
#' \item{`train_p`}{Numeric: training proportion (used by `"StratSub"` and
#' `"StratBoot"`).}
#' \item{`stratify_var`}{Character or `NULL`: stratification variable name.}
#' \item{`strat_n_bins`}{Integer: number of bins for stratification.}
#' \item{`target_length`}{Integer or `NULL`: target resample length
#' (`"StratBoot"` only).}
#' \item{`id_strat`}{Character or `NULL`: ID stratification variable.}
#' \item{`seed`}{Integer or `NULL`: random seed.}
#' }
#'
#' @return A `ResamplerConfig` object of the appropriate subtype.
#'
#' @author EDG
#' @keywords internal
#' @export
.list_to_ResamplerConfig <- function(x) {
switch(
x[["type"]],
KFold = KFoldConfig(
n = x[["n"]],
stratify_var = x[["stratify_var"]],
strat_n_bins = x[["strat_n_bins"]],
id_strat = x[["id_strat"]],
seed = x[["seed"]]
),
StratSub = StratSubConfig(
n = x[["n"]],
train_p = x[["train_p"]],
stratify_var = x[["stratify_var"]],
strat_n_bins = x[["strat_n_bins"]],
id_strat = x[["id_strat"]],
seed = x[["seed"]]
),
StratBoot = StratBootConfig(
n = x[["n"]],
train_p = x[["train_p"]],
stratify_var = x[["stratify_var"]],
strat_n_bins = x[["strat_n_bins"]],
target_length = x[["target_length"]],
id_strat = x[["id_strat"]],
seed = x[["seed"]]
),
Bootstrap = BootstrapConfig(
n = x[["n"]],
id_strat = x[["id_strat"]],
seed = x[["seed"]]
),
LOOCV = LOOCVConfig(
n = NA_integer_
),
Custom = CustomConfig(
n = x[["n"]]
)
)
} # /rtemis::.list_to_ResamplerConfig
================================================
FILE: R/06_Tuner.R
================================================
# S7_Tuner.R
# ::rtemis::
# 2025- EDG rtemis.org
# References
# S7
# https://github.com/RConsortium/S7/
# https://rconsortium.github.io/S7
# future
# https://www.futureverse.org/backends.html
# Description
# `TunerConfig` class and subclasses create objects that store tuner config.
# They are set by `setup_GridSearch()` and perform type checking and validation.
# They are used by `tune()`.
# `Tuner` class and subclasses create objects that store tuning results.
# They are created by `tune()`.
# Dev
# Should both class constructors (e.g. GridSearch@constructor) and setup functions
# (e.g. setup_GridSearch) perform type checking and validation?
# %% TunerConfig ----
#' TunerConfig
#'
#' Superclass for tuner config.
#'
#' @field type Character: Type of tuner.
#' @field config Named list of tuner config.
#'
#' @author EDG
#' @noRd
TunerConfig <- new_class(
name = "TunerConfig",
properties = list(
type = class_character,
config = class_list
)
) # /rtemis::TunerConfig
# %% repr.TunerConfig ----
method(repr, TunerConfig) <- function(
x,
pad = 0L,
output_type = NULL
) {
output_type <- get_output_type(output_type)
paste0(
repr_S7name(
paste(x@type, "TunerConfig"),
pad = pad,
output_type = output_type
),
repr_ls(x@config, pad = pad, output_type = output_type)
)
} # /rtemis::repr.TunerConfig
# %% print.TunerConfig ----
method(print, TunerConfig) <- function(x, pad = 0L, ...) {
cat(repr(x, pad = pad), "\n")
invisible(x)
}
# %% desc.TunerConfig ----
method(desc, TunerConfig) <- function(x) {
if (x@type == "GridSearch") {
paste(x@config[["search_type"]], "grid search")
}
}
# %% `$`.TunerConfig ----
# Make TunerConfig@config `$`-accessible
method(`$`, TunerConfig) <- function(x, name) {
x@config[[name]]
}
# %% `.DollarNames`.TunerConfig ----
# `$`-autocomplete TunerConfig@config
method(`.DollarNames`, TunerConfig) <- function(x, pattern = "") {
all_names <- names(x@config)
grep(pattern, all_names, value = TRUE)
}
# %% `[[`.
gitextract_jpidflln/
├── .Rbuildignore
├── .github/
│ ├── .gitignore
│ ├── CONTRIBUTING.md
│ └── workflows/
│ └── R-CMD-check.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── NEWS.md
├── R/
│ ├── 00_S7init.R
│ ├── 01_ExecutionConfig.R
│ ├── 02_Hyperparameters.R
│ ├── 03_Metrics.R
│ ├── 04_Preprocessor.R
│ ├── 05_Resampler.R
│ ├── 06_Tuner.R
│ ├── 07_Supervised.R
│ ├── 08_MassUni.R
│ ├── 09_ClusteringConfig.R
│ ├── 10_Clustering.R
│ ├── 11_DecompositionConfig.R
│ ├── 12_Decomposition.R
│ ├── 13_Themes.R
│ ├── 14_SuperConfig.R
│ ├── 15_CheckData.R
│ ├── 16_S7utils.R
│ ├── algorithmDB.R
│ ├── calibrate.R
│ ├── check_data.R
│ ├── check_input_data.R
│ ├── cluster.R
│ ├── cluster_CMeans.R
│ ├── cluster_DBSCAN.R
│ ├── cluster_flexclust.R
│ ├── data_xt_example.R
│ ├── ddSci.R
│ ├── ddb.R
│ ├── decomp.R
│ ├── decomp_ICA.R
│ ├── decomp_Isomap.R
│ ├── decomp_NMF.R
│ ├── decomp_PCA.R
│ ├── decomp_UMAP.R
│ ├── decomp_tSNE.R
│ ├── draw_3Dscatter.R
│ ├── draw_bar.R
│ ├── draw_box.R
│ ├── draw_calibration.R
│ ├── draw_confusion.R
│ ├── draw_dist.R
│ ├── draw_graphd3.R
│ ├── draw_graphjs.R
│ ├── draw_heatmap.R
│ ├── draw_leaflet.R
│ ├── draw_pie.R
│ ├── draw_protein.R
│ ├── draw_pvals.R
│ ├── draw_roc.R
│ ├── draw_scatter.R
│ ├── draw_spectrogram.R
│ ├── draw_survfit.R
│ ├── draw_table.R
│ ├── draw_ts.R
│ ├── draw_varimp.R
│ ├── draw_volcano.R
│ ├── draw_xt.R
│ ├── fmt.R
│ ├── ifw.R
│ ├── massGLM.R
│ ├── metrics.R
│ ├── msg.R
│ ├── preprocess.R
│ ├── present.R
│ ├── read.R
│ ├── resample.R
│ ├── rtemis-package.R
│ ├── rtemis_color_system.R
│ ├── theme.R
│ ├── train.R
│ ├── train_CART.R
│ ├── train_GAM.R
│ ├── train_GLM.R
│ ├── train_GLMNET.R
│ ├── train_Isotonic.R
│ ├── train_LightCART.R
│ ├── train_LightGBM.R
│ ├── train_LightRF.R
│ ├── train_LightRuleFit.R
│ ├── train_Ranger.R
│ ├── train_SVM.R
│ ├── train_TabNet.R
│ ├── tune.R
│ ├── tune_GridSearch.R
│ ├── utils.R
│ ├── utils_art.R
│ ├── utils_async.R
│ ├── utils_checks.R
│ ├── utils_color.R
│ ├── utils_data.R
│ ├── utils_data.table.R
│ ├── utils_date.R
│ ├── utils_df.R
│ ├── utils_exec.R
│ ├── utils_files.R
│ ├── utils_html.R
│ ├── utils_io.R
│ ├── utils_lightgbm.R
│ ├── utils_palettes.R
│ ├── utils_plot.R
│ ├── utils_plotly.R
│ ├── utils_print.R
│ ├── utils_rt.R
│ ├── utils_rules.R
│ ├── utils_strings.R
│ ├── utils_supervised.R
│ ├── utils_uniprot.R
│ ├── utils_xt.R
│ └── zzz.R
├── README.md
├── data/
│ └── xt_example.rda
├── data-raw/
│ └── create_xt_example.R
├── inst/
│ ├── CITATION
│ ├── extdata/
│ │ ├── us-counties.rds
│ │ └── us-states.rds
│ └── resources/
│ ├── aminoacids.rds
│ ├── rtemis.utf8
│ └── rtemis2.utf8
├── man/
│ ├── available_algorithms.Rd
│ ├── available_draw.Rd
│ ├── available_themes.Rd
│ ├── calibrate.Rd
│ ├── check_data.Rd
│ ├── choose_theme.Rd
│ ├── class_imbalance.Rd
│ ├── classification_metrics.Rd
│ ├── clean_colnames.Rd
│ ├── clean_names.Rd
│ ├── cluster.Rd
│ ├── col2grayscale.Rd
│ ├── color_adjust.Rd
│ ├── ddSci.Rd
│ ├── ddb_collect.Rd
│ ├── ddb_data.Rd
│ ├── decomp.Rd
│ ├── describe.Rd
│ ├── df_movecolumn.Rd
│ ├── df_nunique_perfeat.Rd
│ ├── dot-list_to_Hyperparameters.Rd
│ ├── dot-list_to_ResamplerConfig.Rd
│ ├── dot-list_to_TunerConfig.Rd
│ ├── draw_3Dscatter.Rd
│ ├── draw_bar.Rd
│ ├── draw_box.Rd
│ ├── draw_calibration.Rd
│ ├── draw_confusion.Rd
│ ├── draw_dist.Rd
│ ├── draw_fit.Rd
│ ├── draw_graphD3.Rd
│ ├── draw_graphjs.Rd
│ ├── draw_heatmap.Rd
│ ├── draw_leaflet.Rd
│ ├── draw_pie.Rd
│ ├── draw_protein.Rd
│ ├── draw_pvals.Rd
│ ├── draw_roc.Rd
│ ├── draw_scatter.Rd
│ ├── draw_spectrogram.Rd
│ ├── draw_survfit.Rd
│ ├── draw_table.Rd
│ ├── draw_ts.Rd
│ ├── draw_varimp.Rd
│ ├── draw_volcano.Rd
│ ├── draw_xt.Rd
│ ├── dt_describe.Rd
│ ├── dt_inspect_types.Rd
│ ├── dt_keybin_reshape.Rd
│ ├── dt_merge.Rd
│ ├── dt_names_by_attr.Rd
│ ├── dt_nunique_perfeat.Rd
│ ├── dt_pctmatch.Rd
│ ├── dt_pctmissing.Rd
│ ├── dt_set_autotypes.Rd
│ ├── dt_set_clean_all.Rd
│ ├── dt_set_cleanfactorlevels.Rd
│ ├── dt_set_logical2factor.Rd
│ ├── dt_set_one_hot.Rd
│ ├── exc.Rd
│ ├── feature_matrix.Rd
│ ├── feature_names.Rd
│ ├── features.Rd
│ ├── get_factor_names.Rd
│ ├── get_mode.Rd
│ ├── get_msg_sink.Rd
│ ├── get_palette.Rd
│ ├── getnames.Rd
│ ├── getnamesandtypes.Rd
│ ├── grapes-BC-grapes.Rd
│ ├── inc.Rd
│ ├── index_col_by_attr.Rd
│ ├── init_project_dir.Rd
│ ├── inspect.Rd
│ ├── inspect_type.Rd
│ ├── is_constant.Rd
│ ├── labelify.Rd
│ ├── massGLM.Rd
│ ├── matchcases.Rd
│ ├── mgetnames.Rd
│ ├── names_by_class.Rd
│ ├── one_hot2factor.Rd
│ ├── outcome.Rd
│ ├── outcome_name.Rd
│ ├── plot.MassGLM.Rd
│ ├── plot_manhattan.Rd
│ ├── plot_roc.Rd
│ ├── plot_true_pred.Rd
│ ├── plot_varimp.Rd
│ ├── preprocess.Rd
│ ├── preprocessed.Rd
│ ├── present.Rd
│ ├── previewcolor.Rd
│ ├── read.Rd
│ ├── read_config.Rd
│ ├── regression_metrics.Rd
│ ├── resample.Rd
│ ├── rnormmat.Rd
│ ├── rtemis-package.Rd
│ ├── rtemis_colors.Rd
│ ├── rtversion.Rd
│ ├── runifmat.Rd
│ ├── set_msg_sink.Rd
│ ├── set_outcome.Rd
│ ├── setdiffsym.Rd
│ ├── setup_CART.Rd
│ ├── setup_CMeans.Rd
│ ├── setup_DBSCAN.Rd
│ ├── setup_ExecutionConfig.Rd
│ ├── setup_GAM.Rd
│ ├── setup_GLM.Rd
│ ├── setup_GLMNET.Rd
│ ├── setup_GridSearch.Rd
│ ├── setup_HardCL.Rd
│ ├── setup_ICA.Rd
│ ├── setup_Isomap.Rd
│ ├── setup_Isotonic.Rd
│ ├── setup_KMeans.Rd
│ ├── setup_LightCART.Rd
│ ├── setup_LightGBM.Rd
│ ├── setup_LightRF.Rd
│ ├── setup_LightRuleFit.Rd
│ ├── setup_LinearSVM.Rd
│ ├── setup_NMF.Rd
│ ├── setup_NeuralGas.Rd
│ ├── setup_PCA.Rd
│ ├── setup_Preprocessor.Rd
│ ├── setup_RadialSVM.Rd
│ ├── setup_Ranger.Rd
│ ├── setup_Resampler.Rd
│ ├── setup_SuperConfig.Rd
│ ├── setup_SuperConfigLive.Rd
│ ├── setup_TabNet.Rd
│ ├── setup_UMAP.Rd
│ ├── setup_tSNE.Rd
│ ├── size.Rd
│ ├── table_column_attr.Rd
│ ├── theme.Rd
│ ├── to_json.Rd
│ ├── train.Rd
│ ├── uniprot_get.Rd
│ ├── with_msg_sink.Rd
│ ├── write_toml.Rd
│ ├── xt_example.Rd
│ └── xtdescribe.Rd
└── tests/
├── testthat/
│ ├── test_Calibration.R
│ ├── test_CheckData.R
│ ├── test_Clustering.R
│ ├── test_Decomposition.R
│ ├── test_ExecutionConfig.R
│ ├── test_Hyperparameters.R
│ ├── test_Metrics.R
│ ├── test_Preprocessor.R
│ ├── test_Resampler.R
│ ├── test_SuperConfig.R
│ ├── test_SuperConfigLive.R
│ ├── test_Supervised.R
│ ├── test_Theme.R
│ ├── test_Tuner.R
│ ├── test_checks.R
│ ├── test_colorsystem.R
│ ├── test_draw.R
│ ├── test_idx.R
│ ├── test_massGLM.R
│ ├── test_msg_sink.R
│ ├── test_strings.R
│ └── test_to_json.R
└── testthat.R
Condensed preview — 296 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,562K chars).
[
{
"path": ".Rbuildignore",
"chars": 319,
"preview": "^__dev$\n^__validation$\n^_pkgdown\\.yml$\n^.*\\.code-workspace$\n^.*\\.Rcheck$\n^.*\\.tar.gz$\n^[.]?air[.]toml$\n^\\.claude$\n^\\.DS_"
},
{
"path": ".github/.gitignore",
"chars": 8,
"preview": "*.html\n\n"
},
{
"path": ".github/CONTRIBUTING.md",
"chars": 5307,
"preview": "# Contributing to rtemis\n\nThank you for your interest in contributing to **rtemis**! This guide will help you report iss"
},
{
"path": ".github/workflows/R-CMD-check.yaml",
"chars": 1059,
"preview": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at"
},
{
"path": ".gitignore",
"chars": 495,
"preview": "# Dev\n__dev/\n__validation/\ndev/\n__out/\nspecs/\n\n# Mac OS\n.DS_Store\n\n# VS Code\n.vscode\n*.code-workspace\n\n# R History files"
},
{
"path": "DESCRIPTION",
"chars": 2395,
"preview": "Package: rtemis\nVersion: 1.2.0\nTitle: Machine Learning and Visualization\nDate: 2026-05-12\nAuthors@R: person(given = \"E.D"
},
{
"path": "LICENSE.md",
"chars": 35149,
"preview": " GNU GENERAL PUBLIC LICENSE\n Version 3, 29 June 2007\n\n Copyright (C) 2007 Free "
},
{
"path": "Makefile",
"chars": 2605,
"preview": "PKG := $(shell awk '/^Package:/{print $$2; exit}' DESCRIPTION)\nR ?= R\nRSCRIPT ?= Rscript\nCHECK_DIR := $(PKG).Rcheck\nTARB"
},
{
"path": "NAMESPACE",
"chars": 3876,
"preview": "# Generated by roxygen2: do not edit by hand\n\nS3method(plot,MassGLM)\nexport(\"%BC%\")\nexport(.list_to_Hyperparameters)\nexp"
},
{
"path": "NEWS.md",
"chars": 452,
"preview": "# rtemis news\n\n## 1.0.0 First CRAN release\n\n## 1.0.1\n\n- Introduce `VariableImportance` S7 class to represent variable im"
},
{
"path": "R/00_S7init.R",
"chars": 32455,
"preview": "# S7_init.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# References\n# S7 generics: https://rconsortium.github.io/S7/articles/g"
},
{
"path": "R/01_ExecutionConfig.R",
"chars": 3891,
"preview": "# ExecutionConfig.R\n# ::rtemis::\n# 2026- EDG rtemis.org\n\n# %% ExecutionConfig ----\n#' ExecutionConfig Class\n#'\n#' @descr"
},
{
"path": "R/02_Hyperparameters.R",
"chars": 67408,
"preview": "# S7_Hyperparameters.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# References ----\n# S7\n# - https://github.com/RConsortium/S7"
},
{
"path": "R/03_Metrics.R",
"chars": 12273,
"preview": "# S7_Metrics.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% Metrics ----\n#' @title Metrics\n#'\n#' @description\n#' Superclass"
},
{
"path": "R/04_Preprocessor.R",
"chars": 15071,
"preview": "# S7_Preprocessor.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# References\n# https://github.com/RConsortium/S7/\n# https://rco"
},
{
"path": "R/05_Resampler.R",
"chars": 13128,
"preview": "# S7_Resampler.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# References\n# https://github.com/RConsortium/S7/\n# https://rconso"
},
{
"path": "R/06_Tuner.R",
"chars": 10256,
"preview": "# S7_Tuner.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# References\n# S7\n# https://github.com/RConsortium/S7/\n# https://rcons"
},
{
"path": "R/07_Supervised.R",
"chars": 69545,
"preview": "# S7_Supervised.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# References\n# https://github.com/RConsortium/S7/\n# https://rcons"
},
{
"path": "R/08_MassUni.R",
"chars": 6907,
"preview": "# S7_MassUni.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% MassGLM ----\n#' @title MassGLM\n#'\n#' @description\n#' Superclass "
},
{
"path": "R/09_ClusteringConfig.R",
"chars": 10045,
"preview": "# S7_ClusteringConfig.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% ClusteringConfig ----\n#' @title ClusteringConfig\n#'\n#'"
},
{
"path": "R/10_Clustering.R",
"chars": 1653,
"preview": "# S7_Clustering.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% Clustering ----\n#' @title Clustering\n#'\n#' @description\n#' C"
},
{
"path": "R/11_DecompositionConfig.R",
"chars": 15062,
"preview": "# S7_DecompositionConfig.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% DecompositionConfig ----\n#' @title DecompositionConf"
},
{
"path": "R/12_Decomposition.R",
"chars": 1990,
"preview": "# S7_Decomposition.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% Decomposition ----\n#' @title Decomposition\n#'\n#' @descript"
},
{
"path": "R/13_Themes.R",
"chars": 1346,
"preview": "# S7_Themes.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% Theme ----\n#' Theme\n#'\n#' @field name Character: Name of theme.\n#"
},
{
"path": "R/14_SuperConfig.R",
"chars": 13181,
"preview": "# S7_SuperConfig.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# References ----\n# https://github.com/RConsortium/S7\n# https://"
},
{
"path": "R/15_CheckData.R",
"chars": 10967,
"preview": "# CheckData.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% CheckData ----\n#' @author EDG\n#' @noRd\nCheckData <- new_class(\n "
},
{
"path": "R/16_S7utils.R",
"chars": 2601,
"preview": "# S7_utils\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% SuperWorkers ----\n#' @keywords internal\n#' @noRd\nSuperWorkers <- new"
},
{
"path": "R/algorithmDB.R",
"chars": 9559,
"preview": "# algorithmDB.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# Supervised Learning ----\nsupervised_algorithms <- data.frame(rbind"
},
{
"path": "R/calibrate.R",
"chars": 4950,
"preview": "# calibrate.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% calibrate.Classification ----\n#' Calibrate Binary Classification "
},
{
"path": "R/check_data.R",
"chars": 9213,
"preview": "# check_data.R\n# ::rtemis::\n# 2022- EDG rtemis.org\n\n# %% check_data ----\n#' Check Data\n#'\n#' @param x tabular data: Inpu"
},
{
"path": "R/check_input_data.R",
"chars": 5864,
"preview": "# check_supervised.R\n# ::rtemis::\n# EDG rtemis.org\n\n# Notes:\n# Some algorithms do not work with variable names containin"
},
{
"path": "R/cluster.R",
"chars": 1862,
"preview": "# cluster.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% cluster ----\n#' Perform Clustering\n#'\n#' Perform clustering on the "
},
{
"path": "R/cluster_CMeans.R",
"chars": 1026,
"preview": "# cluster_CMeans.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% cluster_.CMeansConfig ----\n#' C-means Clustering\n#'\n#' @key"
},
{
"path": "R/cluster_DBSCAN.R",
"chars": 1276,
"preview": "# cluster_DBSCAN.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% cluster_.DBSCANConfig ----\n#' Density-based spatial clusteri"
},
{
"path": "R/cluster_flexclust.R",
"chars": 2350,
"preview": "# cluster_KMeans.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% cluster_.KMeansConfig ----\n#' K-means Clustering\n#'\n#' @key"
},
{
"path": "R/data_xt_example.R",
"chars": 1031,
"preview": "#' Example longitudinal dataset\n#'\n#' A small synthetic dataset demonstrating various participation patterns\n#' in longi"
},
{
"path": "R/ddSci.R",
"chars": 2831,
"preview": "# ddSci.R\n# ::rtemis::\n# 2015- EDG rtemis.org\n\n#' Format Numbers for Printing\n#'\n#' 2 Decimal places, otherwise scientif"
},
{
"path": "R/ddb.R",
"chars": 6191,
"preview": "# ddb.R\n# ::rtemis::\n# 2022- EDG rtemis.org\n\n#' Read CSV using DuckDB\n#'\n#' Lazy-read a CSV file, optionally: filter row"
},
{
"path": "R/decomp.R",
"chars": 1448,
"preview": "# decomp.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% decomp ----\n#' Perform Data Decomposition\n#'\n#' Perform linear or no"
},
{
"path": "R/decomp_ICA.R",
"chars": 834,
"preview": "# decom_ICA.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% decomp_.ICAConfig ----\n#' ICA Decomposition\n#'\n#' @keywords inter"
},
{
"path": "R/decomp_Isomap.R",
"chars": 729,
"preview": "# decom_Isomap.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% decomp_.IsomapConfig ----\n#' Isomap Decomposition\n#'\n#' @keywo"
},
{
"path": "R/decomp_NMF.R",
"chars": 1131,
"preview": "# decom_NMF.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% decomp_.NMFConfig ----\n#' Non-negative Matrix Factorization (NMF)"
},
{
"path": "R/decomp_PCA.R",
"chars": 674,
"preview": "# decom_PCA.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% decomp_.PCAConfig ----\n#' PCA Decomposition\n#'\n#' @keywords inter"
},
{
"path": "R/decomp_UMAP.R",
"chars": 1135,
"preview": "# decom_UMAP.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% decomp_.UMAPConfig ----\n#' UMAP Decomposition\n#'\n#' @param x A n"
},
{
"path": "R/decomp_tSNE.R",
"chars": 804,
"preview": "# decom_tSNE.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% decomp_.tSNEConfig ----\n#' tSNE Decomposition\n#'\n#' @keywords in"
},
{
"path": "R/draw_3Dscatter.R",
"chars": 15732,
"preview": "# draw_3Dscatter.R\n# ::rtemis::\n# 2019- EDG rtemis.org\n\n#' Interactive 3D Scatter Plots\n#'\n#' Draw interactive 3D scatte"
},
{
"path": "R/draw_bar.R",
"chars": 12354,
"preview": "# draw_bar.R\n# ::rtemis::\n# 2019-22 EDG rtemis.org\n\n#' Interactive Barplots\n#'\n#' Draw interactive barplots using `plotl"
},
{
"path": "R/draw_box.R",
"chars": 36692,
"preview": "# draw_box.R\n# ::rtemis::\n# EDG rtemis.org\n\n#' Interactive Boxplots & Violin plots\n#'\n#' Draw interactive boxplots or vi"
},
{
"path": "R/draw_calibration.R",
"chars": 6206,
"preview": "# draw_calibration.R\n# ::rtemis::\n# 2023 EDG rtemis.org\n\n#' Draw calibration plot\n#'\n#' @param true_labels Factor or lis"
},
{
"path": "R/draw_confusion.R",
"chars": 12564,
"preview": "# draw_confusion.R\n# ::rtemis::\n# 2024- EDG rtemis.org\n\n#' Plot confusion matrix\n#'\n#' @param x `ClassificationMetrics` "
},
{
"path": "R/draw_dist.R",
"chars": 15787,
"preview": "# draw_dist.R\n# ::rtemis::\n# 2019- EDG rtemis.org\n\n# check whether list is reordered with ridge\n\n#' Draw Distributions u"
},
{
"path": "R/draw_graphd3.R",
"chars": 3175,
"preview": "# draw_graphD3\n# ::rtemis::\n# EDG rtemis.org\n\n#' Plot graph using \\pkg{networkD3}\n#'\n#' @param net \\pkg{igraph} network."
},
{
"path": "R/draw_graphjs.R",
"chars": 5128,
"preview": "# draw_graphjs.R\n# ::rtemis::\n# EDG rtemis.org\n\n#' Plot network using \\pkg{threejs::graphjs}\n#'\n#' Interactive plotting "
},
{
"path": "R/draw_heatmap.R",
"chars": 10445,
"preview": "# draw_heatmap.R\n# ::rtemis::\n# 2017 EDG rtemis.org\n\n#' Interactive Heatmaps\n#'\n#' Draw interactive heatmaps using `heat"
},
{
"path": "R/draw_leaflet.R",
"chars": 6580,
"preview": "# draw_leaflet.R\n# ::rtemis::\n# 2020 EDG rtemis.org\n\n#' Plot interactive choropleth map using \\pkg{leaflet}\n#'\n#' @param"
},
{
"path": "R/draw_pie.R",
"chars": 5290,
"preview": "# draw_pie.R\n# ::rtemis::\n# 2019 EDG rtemis.org\n\n#' Interactive Pie Chart\n#'\n#' Draw interactive pie charts using `plotl"
},
{
"path": "R/draw_protein.R",
"chars": 33595,
"preview": "# draw_protein\n# ::rtemis::\n# 2022- EDG rtemis.org\n\n#' Plot an amino acid sequence with annotations\n#'\n#' Plot an amino "
},
{
"path": "R/draw_pvals.R",
"chars": 1378,
"preview": "# draw_pvals.R\n# ::rtemis::\n# 2021 EDG rtemis.org\n\n#' Barplot p-values using [draw_bar]\n#'\n#' Plot 1 - p-values as a bar"
},
{
"path": "R/draw_roc.R",
"chars": 5278,
"preview": "# draw_roc.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n#' Draw ROC curve\n#'\n#' @param true_labels Factor: True outcome labels."
},
{
"path": "R/draw_scatter.R",
"chars": 26031,
"preview": "# draw_scatter.R\n# ::rtemis::\n# 2019- EDG rtemis.org\n\n#' Interactive Scatter Plots\n#'\n#' Draw interactive scatter plots "
},
{
"path": "R/draw_spectrogram.R",
"chars": 8267,
"preview": "# draw_spectrogram.R\n# ::rtemis::\n# 2023 EDG rtemis.org\n# https://plotly.com/r/heatmaps/\n\n#' Interactive Spectrogram\n#'\n"
},
{
"path": "R/draw_survfit.R",
"chars": 2619,
"preview": "# draw_survfit.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# draw_scatter(time, survival_prob, mode = \"lines\", line_shape = \"h"
},
{
"path": "R/draw_table.R",
"chars": 3605,
"preview": "# draw_table.R\n# ::rtemis::\n# 2019 EDG rtemis.org\n\n#' Simple HTML table\n#'\n#' Draw an html table using `plotly`\n#'\n#' @p"
},
{
"path": "R/draw_ts.R",
"chars": 7035,
"preview": "# draw_ts.R\n# ::rtemis::\n# 2022 EDG rtemis.org\n\n# => recalc limits for fn = \"sum\"\n\n#' Interactive Timeseries Plots\n#'\n#'"
},
{
"path": "R/draw_varimp.R",
"chars": 6512,
"preview": "# draw_varimp.R\n# ::rtemis::\n# 2017 EDG rtemis.org\n\n#' Interactive Variable Importance Plot\n#'\n#' Plot variable importan"
},
{
"path": "R/draw_volcano.R",
"chars": 13190,
"preview": "# draw_volcano\n# ::rtemis::\n# 2022 EDG rtemis.org\n# allow custom grouping\n\n# References\n# https://github.com/plotly/plot"
},
{
"path": "R/draw_xt.R",
"chars": 18693,
"preview": "# draw_xt.R\n# ::rtemis::\n# 2024 EDG rtemis.org\n\n# Multiple legends\n# https://plotly.com/python/legend/#adding-multiple-l"
},
{
"path": "R/fmt.R",
"chars": 14251,
"preview": "# fmt.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% fmt ----\n#' Text formatting\n#'\n#' Formats text with specified color, st"
},
{
"path": "R/ifw.R",
"chars": 1157,
"preview": "# ifw.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n#' Inverse Frequency Weighting\n#'\n#' @param y Vector: Outcome\n#' @param typ"
},
{
"path": "R/massGLM.R",
"chars": 3895,
"preview": "# massGLM.R\n# ::rtemis::\n# 2021- EDG rtemis.org\n\n#' Mass-univariate GLM Analysis\n#'\n#' @param x tabular data: Predictor "
},
{
"path": "R/metrics.R",
"chars": 17052,
"preview": "# metrics.R\n# ::rtemis::\n# 2019- EDG rtemis.org\n\n#' Error functions\n#'\n#' Convenience functions for calculating loss.\n#'"
},
{
"path": "R/msg.R",
"chars": 9949,
"preview": "# msg.R\n# ::rtemis::\n# 2016- EDG rtemis.org\n\n#' Get current date and time\n#'\n#' @details\n#' used by msgdatetime, log_to_"
},
{
"path": "R/preprocess.R",
"chars": 27470,
"preview": "# preprocess.R\n# ::rtemis::\n# 2017- EDG rtemis.org\n\n# %% preprocess(x, PreprocessorConfig, ...) ----\n#' @name\n#' preproc"
},
{
"path": "R/present.R",
"chars": 4107,
"preview": "# present.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n#' Present list of Supervised or SupervisedRes objects\n#'\n#' Plot traini"
},
{
"path": "R/read.R",
"chars": 8187,
"preview": "# read.R\n# ::rtemis::\n# 2022- EDG rtemis.org\n\n# %% read ----\n#' Read tabular data from a variety of formats\n#'\n#' Read d"
},
{
"path": "R/resample.R",
"chars": 9428,
"preview": "# resample.R\n# ::rtemis::\n# 2015- EDG rtemis.org\n\n#' Resample data\n#'\n#' Create resamples of your data, e.g. for model b"
},
{
"path": "R/rtemis-package.R",
"chars": 3042,
"preview": "# rtemis-package.R\n# ::rtemis::\n# 2015- EDG rtemis.org\n\n#' \\pkg{rtemis}: Advanced Machine Learning and Visualization\n#'\n"
},
{
"path": "R/rtemis_color_system.R",
"chars": 2065,
"preview": "#' rtemis Color System\n#'\n#' @author EDG\n#'\n#' @keywords internal\n#' @noRd\nrtemis_light_teal <- \"#00fdfd\"\nrtemis_light_b"
},
{
"path": "R/theme.R",
"chars": 32844,
"preview": "# theme.R\n# ::rtemis::\n# EDG rtemis.org\n\n# %% Black ----\n#' Themes for `draw_*` functions\n#'\n#' @param bg Color: Figure "
},
{
"path": "R/train.R",
"chars": 25203,
"preview": "# train.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% train ----\n#' Train Supervised Learning Models\n#'\n#' @description\n#'"
},
{
"path": "R/train_CART.R",
"chars": 3721,
"preview": "# train_CART.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% train_.CARTHyperparameters ----\n#' Train a CART decision tree\n#"
},
{
"path": "R/train_GAM.R",
"chars": 4661,
"preview": "# train_GAM.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% train_.GAMHyperparameters ----\n#' Train a GAM model\n#'\n#' Train a"
},
{
"path": "R/train_GLM.R",
"chars": 2940,
"preview": "# train_GLM.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% train_.GLMHyperparameters ----\n#' Train a GLM model\n#'\n#' Train a"
},
{
"path": "R/train_GLMNET.R",
"chars": 6971,
"preview": "# train_GLMNET.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% train_.GLMNETHyperparameters ----\n#' Train a GLMNET model\n#'\n"
},
{
"path": "R/train_Isotonic.R",
"chars": 2400,
"preview": "# train_Isotonic.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% train_.IsotonicHyperparameters ----\n#' Train an Isotonic mo"
},
{
"path": "R/train_LightCART.R",
"chars": 2323,
"preview": "# train_LightCART.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% train_.LightCARTHyperparameters ----\n#' Decision Tree usin"
},
{
"path": "R/train_LightGBM.R",
"chars": 4345,
"preview": "# train_LightGBM.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# LightGBM parameters\n# https://lightgbm.readthedocs.io/en/latest"
},
{
"path": "R/train_LightRF.R",
"chars": 2842,
"preview": "# train_LightRF.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# References\n# LightGBM parameters: https://lightgbm.readthedocs.i"
},
{
"path": "R/train_LightRuleFit.R",
"chars": 6932,
"preview": "# train_LightRuleFit.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% train_.LightRuleFitHyperparameters ----\n#' Train a Light"
},
{
"path": "R/train_Ranger.R",
"chars": 6246,
"preview": "# train_Ranger.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# References\n# https://imbs-hl.github.io/ranger/reference/ranger.ht"
},
{
"path": "R/train_SVM.R",
"chars": 6447,
"preview": "# train_SVM.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n# %% train_.LinearSVMHyperparameters ----\n#' Train a Linear SVM model"
},
{
"path": "R/train_TabNet.R",
"chars": 2810,
"preview": "# train_TabNet.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% train_.TabNetHyperparameters ----\n#' Train a TabNet model\n#'\n#"
},
{
"path": "R/tune.R",
"chars": 1659,
"preview": "# tune.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% get_tuner_fn ----\n#' Get Tuner Function\n#'\n#' @param type Character: T"
},
{
"path": "R/tune_GridSearch.R",
"chars": 19596,
"preview": "# tune_GridSearch.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n# %% tune_GridSearch ----\n#' \\pkg{rtemis} internal: Grid Search "
},
{
"path": "R/utils.R",
"chars": 11714,
"preview": "# utils.R\n# ::rtemis::\n# 2016- EDG rtemis.org\n\n#' Print range of continuous variable\n#'\n#' @param x Numeric vector\n#' @p"
},
{
"path": "R/utils_art.R",
"chars": 9387,
"preview": "# utils_art.R\n# ::rtemis::\n# 2025- EDG rtemis.org\n\n#' Color columns of text art\n#'\n#' This function accepts text input o"
},
{
"path": "R/utils_async.R",
"chars": 3620,
"preview": "# utils_async.R\n# ::rtemis::\n# 2026- EDG rtemis.org\n\n# Define allowed future plans\nALLOWED_PLANS <- c(\n \"sequential\",\n "
},
{
"path": "R/utils_checks.R",
"chars": 12971,
"preview": "# utils_checks.R\n# ::rtemis::\n# 2024- EDG rtemis.org\n\n# clean_* functions performm checks and return clean inputs.\n# che"
},
{
"path": "R/utils_color.R",
"chars": 18803,
"preview": "# utils_color.R\n# ::rtemis::\n# 2016- EDG rtemis.org\n\n#' Simple Color Operations\n#'\n#' Invert a color or calculate the me"
},
{
"path": "R/utils_data.R",
"chars": 11516,
"preview": "# utils_data.R\n# ::rtemis::\n# EDG rtemis.org\n\n# %% Public --------------------------------------------------------------"
},
{
"path": "R/utils_data.table.R",
"chars": 19905,
"preview": "# utils_data.table.R\n# ::rtemis::\n# 2022- EDG rtemis.org\n\n#' Number of unique values per feature\n#'\n#' @param x data.tab"
},
{
"path": "R/utils_date.R",
"chars": 5772,
"preview": "# utils_date.R\n# ::rtemis::\n# 2024- EDG rtemis.org\n\n#' Extract features from dates\n#'\n#' @details weekday and month will"
},
{
"path": "R/utils_df.R",
"chars": 6294,
"preview": "# dataops\n# ::rtemis::\n# 2021 EDG rtemis.org\n\n#' Get names by string matching or class\n#'\n#' @details\n#' For `getnames()"
},
{
"path": "R/utils_exec.R",
"chars": 2938,
"preview": "# utils_exec.Ranger\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n#' Do call with tryCatch and suggestion\n#'\n#' @param fn Function"
},
{
"path": "R/utils_files.R",
"chars": 603,
"preview": "# utils_files.R\n# ::rtemis::\n# 2025 EDG rtemis.org\n\n#' Expand, normalize, concatenate, clean path\n#'\n#' @param ... Chara"
},
{
"path": "R/utils_html.R",
"chars": 881,
"preview": "# html_ops.R\n# ::rtemis::\n# 2023- EDG rtemis.org\n\n#' @keywords internal\n#' @noRd\nhtml_highlight <- function(..., bold = "
},
{
"path": "R/utils_io.R",
"chars": 5676,
"preview": "# utils_io.R\n# ::rtemis::\n# 2022 EDG rtemis.org\n\n#' Write \\pkg{rtemis} model to RDS file\n#'\n#' @param object `Supervised"
},
{
"path": "R/utils_lightgbm.R",
"chars": 9361,
"preview": "# utils_lightgbm.R\n# ::rtemis::\n# 2023- EDG rtemis.org\n\n# %% prepare_lgb_data ----\n#' Prepare data for LightGBM-based le"
},
{
"path": "R/utils_palettes.R",
"chars": 43591,
"preview": "# palettes.R\n# ::rtemis::\n# 2016- EDG rtemis.org\n\n# Colors ----\n#' @keywords internal\n#' @noRd\nucsfCol <- c(\n Navy = \"#"
},
{
"path": "R/utils_plot.R",
"chars": 380,
"preview": "# plotops.R\n# ::rtemis::\n# 2020- EDG rtemis.org\n\n#' @keywords internal\n#' @noRd\ngetlim <- function(x, axs = c(\"r\", \"i\"),"
},
{
"path": "R/utils_plotly.R",
"chars": 4449,
"preview": "# utils_plotly\n# ::rtemis::\n# 2021- EDG rtemis.org\n\n# plotly_vline calls plotly_vline1 to create a list for one or more "
},
{
"path": "R/utils_print.R",
"chars": 27696,
"preview": "# print_ops.R\n# ::rtemis::\n# 2016-23 EDG rtemis.org\n\nis_common_struct <- function(x) {\n class(x)[1] %in%\n c(\n \""
},
{
"path": "R/utils_rt.R",
"chars": 4078,
"preview": "# info\n# ::rtemis::\n# 2016- EDG rtemis.org\n\n#' `rtemis-internals`: `intro`\n#'\n#' Intro\n#'\n#' Starts function execution t"
},
{
"path": "R/utils_rules.R",
"chars": 7420,
"preview": "# utils_rules.R\n# ::rtemis::\n# EDG rtemis.org\n\n#' Match Rules to Cases\n#'\n#' @param x Matrix / data frame: Input feature"
},
{
"path": "R/utils_strings.R",
"chars": 16067,
"preview": "# strng.R\n# ::rtemis::\n# 2022 EDG rtemis.org\n\n# General hilite function output bold + any color.\nhilite <- function(\n ."
},
{
"path": "R/utils_supervised.R",
"chars": 8436,
"preview": "# super_ops.R\n# ::rtemis::\n# 2024- EDG rtemis.org\n\nsupervised_type <- function(dat) {\n if (is.factor(outcome(dat))) {\n "
},
{
"path": "R/utils_uniprot.R",
"chars": 1121,
"preview": "# uniprot_get.R\n# ::rtemis::\n# 2022 E.D. Gennatas lambdamd.org\n\n#' Get protein sequence from UniProt\n#'\n#' @param access"
},
{
"path": "R/utils_xt.R",
"chars": 5863,
"preview": "# xtdescribe.R\n# ::rtemis::\n# 2024 EDG\n\n#' Describe longitudinal dataset\n#'\n#' This function emulates the `xtdescribe` f"
},
{
"path": "R/zzz.R",
"chars": 3073,
"preview": "# ▄▄▄▄ ▄▄▄▄▄▄▄▄ .• ▌ ▄ ·. ▪ .▄▄ ·\n# ▀▄ █·•██ ▀▄.▀··██ ▐███▪██ ▐█ ▀.\n# ▐▀▀▀▄ ▐█.▪▐▀▀▪▄▐█ ▌▐▌▐█·▐█·▄▀▀▀█▄\n# ▐█• █ ▐█"
},
{
"path": "README.md",
"chars": 5200,
"preview": "[](https://CRAN.R-project.org/package=rtemis)\n[ example\n# ::rtemis::\n# 2025 EDG\n\nset.seed(2025)\n\n# Create a sma"
},
{
"path": "inst/CITATION",
"chars": 775,
"preview": "utils::bibentry(\n header = \"To cite rtemis in publications, please use:\",\n\n # - - - - - - - - - - - - - - - - - - - - "
},
{
"path": "inst/resources/rtemis.utf8",
"chars": 172,
"preview": "▄▄▄▄ ▄▄▄▄▄▄▄▄ .• ▌ ▄ ·. ▪ .▄▄ ·\n▀▄ █·•██ ▀▄.▀··██ ▐███▪██ ▐█ ▀.\n▐▀▀▀▄ ▐█.▪▐▀▀▪▄▐█ ▌▐▌▐█·▐█·▄▀▀▀█▄\n▐█• █ ▐█▌·▐█▄▄▌█"
},
{
"path": "inst/resources/rtemis2.utf8",
"chars": 450,
"preview": " ██▀███ ▄▄▄█████▓▓█████ ███▄ ▄███▓ ██▓ ██████ \n▓██ ▒ ██▒▓ ██▒ ▓▒▓█ ▀ ▓██▒▀█▀ ██▒▓██▒▒██ ▒ \n▓██ ░▄█ ▒▒ ▓██░ ▒░▒█"
},
{
"path": "man/available_algorithms.Rd",
"chars": 710,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/algorithmDB.R\n\\name{available_supervised}\n"
},
{
"path": "man/available_draw.Rd",
"chars": 443,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/algorithmDB.R\n\\name{available_draw}\n\\alias"
},
{
"path": "man/available_themes.Rd",
"chars": 374,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/theme.R\n\\name{available_themes}\n\\alias{ava"
},
{
"path": "man/calibrate.Rd",
"chars": 2485,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/00_S7init.R\n\\name{calibrate}\n\\alias{calibr"
},
{
"path": "man/check_data.Rd",
"chars": 1004,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/check_data.R\n\\name{check_data}\n\\alias{chec"
},
{
"path": "man/choose_theme.Rd",
"chars": 1267,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/theme.R\n\\name{choose_theme}\n\\alias{choose_"
},
{
"path": "man/class_imbalance.Rd",
"chars": 715,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_supervised.R\n\\name{class_imbalance}\n"
},
{
"path": "man/classification_metrics.Rd",
"chars": 1642,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/metrics.R\n\\name{classification_metrics}\n\\a"
},
{
"path": "man/clean_colnames.Rd",
"chars": 818,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_strings.R\n\\name{clean_colnames}\n\\ali"
},
{
"path": "man/clean_names.Rd",
"chars": 724,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_strings.R\n\\name{clean_names}\n\\alias{"
},
{
"path": "man/cluster.Rd",
"chars": 762,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/cluster.R\n\\name{cluster}\n\\alias{cluster}\n\\"
},
{
"path": "man/col2grayscale.Rd",
"chars": 620,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_color.R\n\\name{col2grayscale}\n\\alias{"
},
{
"path": "man/color_adjust.Rd",
"chars": 800,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_color.R\n\\name{color_adjust}\n\\alias{c"
},
{
"path": "man/ddSci.Rd",
"chars": 1265,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ddSci.R\n\\name{ddSci}\n\\alias{ddSci}\n\\title{"
},
{
"path": "man/ddb_collect.Rd",
"chars": 806,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ddb.R\n\\name{ddb_collect}\n\\alias{ddb_collec"
},
{
"path": "man/ddb_data.Rd",
"chars": 2623,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ddb.R\n\\name{ddb_data}\n\\alias{ddb_data}\n\\ti"
},
{
"path": "man/decomp.Rd",
"chars": 743,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/decomp.R\n\\name{decomp}\n\\alias{decomp}\n\\tit"
},
{
"path": "man/describe.Rd",
"chars": 1175,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/00_S7init.R\n\\name{describe}\n\\alias{describ"
},
{
"path": "man/df_movecolumn.Rd",
"chars": 567,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_df.R\n\\name{df_movecolumn}\n\\alias{df_"
},
{
"path": "man/df_nunique_perfeat.Rd",
"chars": 561,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_df.R\n\\name{df_nunique_perfeat}\n\\alia"
},
{
"path": "man/dot-list_to_Hyperparameters.Rd",
"chars": 842,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/02_Hyperparameters.R\n\\name{.list_to_Hyperp"
},
{
"path": "man/dot-list_to_ResamplerConfig.Rd",
"chars": 1376,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/05_Resampler.R\n\\name{.list_to_ResamplerCon"
},
{
"path": "man/dot-list_to_TunerConfig.Rd",
"chars": 1011,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/06_Tuner.R\n\\name{.list_to_TunerConfig}\n\\al"
},
{
"path": "man/draw_3Dscatter.Rd",
"chars": 4418,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_3Dscatter.R\n\\name{draw_3Dscatter}\n\\al"
},
{
"path": "man/draw_bar.Rd",
"chars": 5021,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_bar.R\n\\name{draw_bar}\n\\alias{draw_bar"
},
{
"path": "man/draw_box.Rd",
"chars": 9208,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_box.R\n\\name{draw_box}\n\\alias{draw_box"
},
{
"path": "man/draw_calibration.Rd",
"chars": 2345,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_calibration.R\n\\name{draw_calibration}"
},
{
"path": "man/draw_confusion.Rd",
"chars": 2136,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_confusion.R\n\\name{draw_confusion}\n\\al"
},
{
"path": "man/draw_dist.Rd",
"chars": 6231,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_dist.R\n\\name{draw_dist}\n\\alias{draw_d"
},
{
"path": "man/draw_fit.Rd",
"chars": 1270,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_scatter.R\n\\name{draw_fit}\n\\alias{draw"
},
{
"path": "man/draw_graphD3.Rd",
"chars": 1414,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_graphd3.R\n\\name{draw_graphD3}\n\\alias{"
},
{
"path": "man/draw_graphjs.Rd",
"chars": 3316,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_graphjs.R\n\\name{draw_graphjs}\n\\alias{"
},
{
"path": "man/draw_heatmap.Rd",
"chars": 4534,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_heatmap.R\n\\name{draw_heatmap}\n\\alias{"
},
{
"path": "man/draw_leaflet.Rd",
"chars": 3066,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_leaflet.R\n\\name{draw_leaflet}\n\\alias{"
},
{
"path": "man/draw_pie.Rd",
"chars": 2608,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_pie.R\n\\name{draw_pie}\n\\alias{draw_pie"
},
{
"path": "man/draw_protein.Rd",
"chars": 9334,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_protein.R\n\\name{draw_protein}\n\\alias{"
},
{
"path": "man/draw_pvals.Rd",
"chars": 1057,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_pvals.R\n\\name{draw_pvals}\n\\alias{draw"
},
{
"path": "man/draw_roc.Rd",
"chars": 2479,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_roc.R\n\\name{draw_roc}\n\\alias{draw_roc"
},
{
"path": "man/draw_scatter.Rd",
"chars": 8361,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_scatter.R\n\\name{draw_scatter}\n\\alias{"
},
{
"path": "man/draw_spectrogram.Rd",
"chars": 4042,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_spectrogram.R\n\\name{draw_spectrogram}"
},
{
"path": "man/draw_survfit.Rd",
"chars": 1991,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_survfit.R\n\\name{draw_survfit}\n\\alias{"
},
{
"path": "man/draw_table.Rd",
"chars": 1755,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_table.R\n\\name{draw_table}\n\\alias{draw"
},
{
"path": "man/draw_ts.Rd",
"chars": 4004,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_ts.R\n\\name{draw_ts}\n\\alias{draw_ts}\n\\"
},
{
"path": "man/draw_varimp.Rd",
"chars": 2287,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_varimp.R\n\\name{draw_varimp}\n\\alias{dr"
},
{
"path": "man/draw_volcano.Rd",
"chars": 5626,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_volcano.R\n\\name{draw_volcano}\n\\alias{"
},
{
"path": "man/draw_xt.Rd",
"chars": 7285,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/draw_xt.R\n\\name{draw_xt}\n\\alias{draw_xt}\n\\"
},
{
"path": "man/dt_describe.Rd",
"chars": 963,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_describe}\n\\ali"
},
{
"path": "man/dt_inspect_types.Rd",
"chars": 807,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_inspect_types}"
},
{
"path": "man/dt_keybin_reshape.Rd",
"chars": 1233,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_keybin_reshape"
},
{
"path": "man/dt_merge.Rd",
"chars": 1408,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_merge}\n\\alias{"
},
{
"path": "man/dt_names_by_attr.Rd",
"chars": 977,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_names_by_attr}"
},
{
"path": "man/dt_nunique_perfeat.Rd",
"chars": 791,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_nunique_perfea"
},
{
"path": "man/dt_pctmatch.Rd",
"chars": 971,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_pctmatch}\n\\ali"
},
{
"path": "man/dt_pctmissing.Rd",
"chars": 560,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_pctmissing}\n\\a"
},
{
"path": "man/dt_set_autotypes.Rd",
"chars": 1428,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_set_autotypes}"
},
{
"path": "man/dt_set_clean_all.Rd",
"chars": 908,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_set_clean_all}"
},
{
"path": "man/dt_set_cleanfactorlevels.Rd",
"chars": 883,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_set_cleanfacto"
},
{
"path": "man/dt_set_logical2factor.Rd",
"chars": 1737,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.table.R\n\\name{dt_set_logical2fa"
},
{
"path": "man/dt_set_one_hot.Rd",
"chars": 804,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/preprocess.R\n\\name{dt_set_one_hot}\n\\alias{"
},
{
"path": "man/exc.Rd",
"chars": 504,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/00_S7init.R\n\\name{exc}\n\\alias{exc}\n\\title{"
},
{
"path": "man/feature_matrix.Rd",
"chars": 761,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/preprocess.R\n\\name{feature_matrix}\n\\alias{"
},
{
"path": "man/feature_names.Rd",
"chars": 561,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/00_S7init.R\n\\name{feature_names}\n\\alias{fe"
},
{
"path": "man/features.Rd",
"chars": 642,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/00_S7init.R\n\\name{features}\n\\alias{feature"
},
{
"path": "man/get_factor_names.Rd",
"chars": 543,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/00_S7init.R\n\\name{get_factor_names}\n\\alias"
},
{
"path": "man/get_mode.Rd",
"chars": 755,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils.R\n\\name{get_mode}\n\\alias{get_mode}\n\\"
},
{
"path": "man/get_msg_sink.Rd",
"chars": 446,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/msg.R\n\\name{get_msg_sink}\n\\alias{get_msg_s"
},
{
"path": "man/get_palette.Rd",
"chars": 925,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_palettes.R\n\\name{get_palette}\n\\alias"
},
{
"path": "man/getnames.Rd",
"chars": 1281,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_df.R\n\\name{getnames}\n\\alias{getnames"
},
{
"path": "man/getnamesandtypes.Rd",
"chars": 462,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_df.R\n\\name{getnamesandtypes}\n\\alias{"
},
{
"path": "man/grapes-BC-grapes.Rd",
"chars": 406,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/preprocess.R\n\\name{\\%BC\\%}\n\\alias{\\%BC\\%}\n"
},
{
"path": "man/inc.Rd",
"chars": 541,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/00_S7init.R\n\\name{inc}\n\\alias{inc}\n\\title{"
},
{
"path": "man/index_col_by_attr.Rd",
"chars": 1025,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.R\n\\name{index_col_by_attr}\n\\ali"
},
{
"path": "man/init_project_dir.Rd",
"chars": 703,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils.R\n\\name{init_project_dir}\n\\alias{ini"
},
{
"path": "man/inspect.Rd",
"chars": 402,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/00_S7init.R\n\\name{inspect}\n\\alias{inspect}"
},
{
"path": "man/inspect_type.Rd",
"chars": 1138,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils_data.R\n\\name{inspect_type}\n\\alias{in"
},
{
"path": "man/is_constant.Rd",
"chars": 503,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils.R\n\\name{is_constant}\n\\alias{is_const"
}
]
// ... and 96 more files (download for full content)
About this extraction
This page contains the full source code of the egenn/rtemis GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 296 files (1.4 MB), approximately 433.6k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.