Repository: egenn/rtemis
Branch: main
Commit: ee040e1ef75f
Files: 296
Total size: 1.4 MB

Directory structure:
gitextract_jpidflln/

├── .Rbuildignore
├── .github/
│   ├── .gitignore
│   ├── CONTRIBUTING.md
│   └── workflows/
│       └── R-CMD-check.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── NEWS.md
├── R/
│   ├── 00_S7init.R
│   ├── 01_ExecutionConfig.R
│   ├── 02_Hyperparameters.R
│   ├── 03_Metrics.R
│   ├── 04_Preprocessor.R
│   ├── 05_Resampler.R
│   ├── 06_Tuner.R
│   ├── 07_Supervised.R
│   ├── 08_MassUni.R
│   ├── 09_ClusteringConfig.R
│   ├── 10_Clustering.R
│   ├── 11_DecompositionConfig.R
│   ├── 12_Decomposition.R
│   ├── 13_Themes.R
│   ├── 14_SuperConfig.R
│   ├── 15_CheckData.R
│   ├── 16_S7utils.R
│   ├── algorithmDB.R
│   ├── calibrate.R
│   ├── check_data.R
│   ├── check_input_data.R
│   ├── cluster.R
│   ├── cluster_CMeans.R
│   ├── cluster_DBSCAN.R
│   ├── cluster_flexclust.R
│   ├── data_xt_example.R
│   ├── ddSci.R
│   ├── ddb.R
│   ├── decomp.R
│   ├── decomp_ICA.R
│   ├── decomp_Isomap.R
│   ├── decomp_NMF.R
│   ├── decomp_PCA.R
│   ├── decomp_UMAP.R
│   ├── decomp_tSNE.R
│   ├── draw_3Dscatter.R
│   ├── draw_bar.R
│   ├── draw_box.R
│   ├── draw_calibration.R
│   ├── draw_confusion.R
│   ├── draw_dist.R
│   ├── draw_graphd3.R
│   ├── draw_graphjs.R
│   ├── draw_heatmap.R
│   ├── draw_leaflet.R
│   ├── draw_pie.R
│   ├── draw_protein.R
│   ├── draw_pvals.R
│   ├── draw_roc.R
│   ├── draw_scatter.R
│   ├── draw_spectrogram.R
│   ├── draw_survfit.R
│   ├── draw_table.R
│   ├── draw_ts.R
│   ├── draw_varimp.R
│   ├── draw_volcano.R
│   ├── draw_xt.R
│   ├── fmt.R
│   ├── ifw.R
│   ├── massGLM.R
│   ├── metrics.R
│   ├── msg.R
│   ├── preprocess.R
│   ├── present.R
│   ├── read.R
│   ├── resample.R
│   ├── rtemis-package.R
│   ├── rtemis_color_system.R
│   ├── theme.R
│   ├── train.R
│   ├── train_CART.R
│   ├── train_GAM.R
│   ├── train_GLM.R
│   ├── train_GLMNET.R
│   ├── train_Isotonic.R
│   ├── train_LightCART.R
│   ├── train_LightGBM.R
│   ├── train_LightRF.R
│   ├── train_LightRuleFit.R
│   ├── train_Ranger.R
│   ├── train_SVM.R
│   ├── train_TabNet.R
│   ├── tune.R
│   ├── tune_GridSearch.R
│   ├── utils.R
│   ├── utils_art.R
│   ├── utils_async.R
│   ├── utils_checks.R
│   ├── utils_color.R
│   ├── utils_data.R
│   ├── utils_data.table.R
│   ├── utils_date.R
│   ├── utils_df.R
│   ├── utils_exec.R
│   ├── utils_files.R
│   ├── utils_html.R
│   ├── utils_io.R
│   ├── utils_lightgbm.R
│   ├── utils_palettes.R
│   ├── utils_plot.R
│   ├── utils_plotly.R
│   ├── utils_print.R
│   ├── utils_rt.R
│   ├── utils_rules.R
│   ├── utils_strings.R
│   ├── utils_supervised.R
│   ├── utils_uniprot.R
│   ├── utils_xt.R
│   └── zzz.R
├── README.md
├── data/
│   └── xt_example.rda
├── data-raw/
│   └── create_xt_example.R
├── inst/
│   ├── CITATION
│   ├── extdata/
│   │   ├── us-counties.rds
│   │   └── us-states.rds
│   └── resources/
│       ├── aminoacids.rds
│       ├── rtemis.utf8
│       └── rtemis2.utf8
├── man/
│   ├── available_algorithms.Rd
│   ├── available_draw.Rd
│   ├── available_themes.Rd
│   ├── calibrate.Rd
│   ├── check_data.Rd
│   ├── choose_theme.Rd
│   ├── class_imbalance.Rd
│   ├── classification_metrics.Rd
│   ├── clean_colnames.Rd
│   ├── clean_names.Rd
│   ├── cluster.Rd
│   ├── col2grayscale.Rd
│   ├── color_adjust.Rd
│   ├── ddSci.Rd
│   ├── ddb_collect.Rd
│   ├── ddb_data.Rd
│   ├── decomp.Rd
│   ├── describe.Rd
│   ├── df_movecolumn.Rd
│   ├── df_nunique_perfeat.Rd
│   ├── dot-list_to_Hyperparameters.Rd
│   ├── dot-list_to_ResamplerConfig.Rd
│   ├── dot-list_to_TunerConfig.Rd
│   ├── draw_3Dscatter.Rd
│   ├── draw_bar.Rd
│   ├── draw_box.Rd
│   ├── draw_calibration.Rd
│   ├── draw_confusion.Rd
│   ├── draw_dist.Rd
│   ├── draw_fit.Rd
│   ├── draw_graphD3.Rd
│   ├── draw_graphjs.Rd
│   ├── draw_heatmap.Rd
│   ├── draw_leaflet.Rd
│   ├── draw_pie.Rd
│   ├── draw_protein.Rd
│   ├── draw_pvals.Rd
│   ├── draw_roc.Rd
│   ├── draw_scatter.Rd
│   ├── draw_spectrogram.Rd
│   ├── draw_survfit.Rd
│   ├── draw_table.Rd
│   ├── draw_ts.Rd
│   ├── draw_varimp.Rd
│   ├── draw_volcano.Rd
│   ├── draw_xt.Rd
│   ├── dt_describe.Rd
│   ├── dt_inspect_types.Rd
│   ├── dt_keybin_reshape.Rd
│   ├── dt_merge.Rd
│   ├── dt_names_by_attr.Rd
│   ├── dt_nunique_perfeat.Rd
│   ├── dt_pctmatch.Rd
│   ├── dt_pctmissing.Rd
│   ├── dt_set_autotypes.Rd
│   ├── dt_set_clean_all.Rd
│   ├── dt_set_cleanfactorlevels.Rd
│   ├── dt_set_logical2factor.Rd
│   ├── dt_set_one_hot.Rd
│   ├── exc.Rd
│   ├── feature_matrix.Rd
│   ├── feature_names.Rd
│   ├── features.Rd
│   ├── get_factor_names.Rd
│   ├── get_mode.Rd
│   ├── get_msg_sink.Rd
│   ├── get_palette.Rd
│   ├── getnames.Rd
│   ├── getnamesandtypes.Rd
│   ├── grapes-BC-grapes.Rd
│   ├── inc.Rd
│   ├── index_col_by_attr.Rd
│   ├── init_project_dir.Rd
│   ├── inspect.Rd
│   ├── inspect_type.Rd
│   ├── is_constant.Rd
│   ├── labelify.Rd
│   ├── massGLM.Rd
│   ├── matchcases.Rd
│   ├── mgetnames.Rd
│   ├── names_by_class.Rd
│   ├── one_hot2factor.Rd
│   ├── outcome.Rd
│   ├── outcome_name.Rd
│   ├── plot.MassGLM.Rd
│   ├── plot_manhattan.Rd
│   ├── plot_roc.Rd
│   ├── plot_true_pred.Rd
│   ├── plot_varimp.Rd
│   ├── preprocess.Rd
│   ├── preprocessed.Rd
│   ├── present.Rd
│   ├── previewcolor.Rd
│   ├── read.Rd
│   ├── read_config.Rd
│   ├── regression_metrics.Rd
│   ├── resample.Rd
│   ├── rnormmat.Rd
│   ├── rtemis-package.Rd
│   ├── rtemis_colors.Rd
│   ├── rtversion.Rd
│   ├── runifmat.Rd
│   ├── set_msg_sink.Rd
│   ├── set_outcome.Rd
│   ├── setdiffsym.Rd
│   ├── setup_CART.Rd
│   ├── setup_CMeans.Rd
│   ├── setup_DBSCAN.Rd
│   ├── setup_ExecutionConfig.Rd
│   ├── setup_GAM.Rd
│   ├── setup_GLM.Rd
│   ├── setup_GLMNET.Rd
│   ├── setup_GridSearch.Rd
│   ├── setup_HardCL.Rd
│   ├── setup_ICA.Rd
│   ├── setup_Isomap.Rd
│   ├── setup_Isotonic.Rd
│   ├── setup_KMeans.Rd
│   ├── setup_LightCART.Rd
│   ├── setup_LightGBM.Rd
│   ├── setup_LightRF.Rd
│   ├── setup_LightRuleFit.Rd
│   ├── setup_LinearSVM.Rd
│   ├── setup_NMF.Rd
│   ├── setup_NeuralGas.Rd
│   ├── setup_PCA.Rd
│   ├── setup_Preprocessor.Rd
│   ├── setup_RadialSVM.Rd
│   ├── setup_Ranger.Rd
│   ├── setup_Resampler.Rd
│   ├── setup_SuperConfig.Rd
│   ├── setup_SuperConfigLive.Rd
│   ├── setup_TabNet.Rd
│   ├── setup_UMAP.Rd
│   ├── setup_tSNE.Rd
│   ├── size.Rd
│   ├── table_column_attr.Rd
│   ├── theme.Rd
│   ├── to_json.Rd
│   ├── train.Rd
│   ├── uniprot_get.Rd
│   ├── with_msg_sink.Rd
│   ├── write_toml.Rd
│   ├── xt_example.Rd
│   └── xtdescribe.Rd
└── tests/
    ├── testthat/
    │   ├── test_Calibration.R
    │   ├── test_CheckData.R
    │   ├── test_Clustering.R
    │   ├── test_Decomposition.R
    │   ├── test_ExecutionConfig.R
    │   ├── test_Hyperparameters.R
    │   ├── test_Metrics.R
    │   ├── test_Preprocessor.R
    │   ├── test_Resampler.R
    │   ├── test_SuperConfig.R
    │   ├── test_SuperConfigLive.R
    │   ├── test_Supervised.R
    │   ├── test_Theme.R
    │   ├── test_Tuner.R
    │   ├── test_checks.R
    │   ├── test_colorsystem.R
    │   ├── test_draw.R
    │   ├── test_idx.R
    │   ├── test_massGLM.R
    │   ├── test_msg_sink.R
    │   ├── test_strings.R
    │   └── test_to_json.R
    └── testthat.R

================================================
FILE CONTENTS
================================================

================================================
FILE: .Rbuildignore
================================================
^__dev$
^__validation$
^_pkgdown\.yml$
^.*\.code-workspace$
^.*\.Rcheck$
^.*\.tar.gz$
^[.]?air[.]toml$
^\.claude$
^\.DS_Store$
^\.gemini$
^\.github$
^\.lintr$
^\.rtms-instructions\.md$
^\.vscode$
^cran-comments\.md$
^LICENSE\.md$
^data-raw$
^dev$
^docs$
^specs$
^AGENTS\.md$
^NEWS\.md$
^pkgdown$
^Makefile$
^SKILL\.md$


================================================
FILE: .github/.gitignore
================================================
*.html


================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing to rtemis

Thank you for your interest in contributing to **rtemis**! This guide will help you report issues effectively.

## Before Opening an Issue

### Update to Latest Version

Ensure you're using the latest version of rtemis (v0.99+). Many issues may already be fixed in recent updates.

```r
# Install from CRAN
install.packages("rtemis")

# Install from GitHub
pak::pak("rtemis-org/rtemis")

# Install from r-universe
install.packages('rtemis', repos = 'https://rtemis-org.r-universe.dev')

# Check your version
packageVersion("rtemis")
```

### Check Existing Issues

Please search [existing issues](https://github.com/rtemis-org/rtemis/issues) to see if your problem or suggestion has already been reported. If you find a related issue, add a comment with any additional information.

### Review Documentation

- **API Documentation**: https://docs.rtemis.org/r/ml-api/
- **General Documentation**: https://docs.rtemis.org/r/ml

## Opening an Issue

### Issue Types

We welcome the following types of issues:

1. **🐛 Bug Reports**: Unexpected behavior, errors, or crashes. (Use `[BUG]` in the title)
2. **✨ Feature Requests**: Ideas for new functionality. (Use `[FEATURE]` in the title)
3. **📚 Documentation**: Improvements to docs or examples. (Use `[DOC]` in the title)
4. **❓ Questions**: Use [Discussions](https://github.com/rtemis-org/rtemis/discussions) for usage questions

### Bug Reports

A good bug report should include:

#### Required Information

1. **rtemis version**: Output of `packageVersion("rtemis")`
2. **R version**: Output of `R.version.string`
3. **Operating System**: e.g., macOS 14.5, Ubuntu 22.04, Windows 11
4. **Clear description**: What did you expect vs. what actually happened?

#### Reproducible Example

**Critical**: Provide a minimal reproducible example. Use the template below:

```r
# Load required packages
library(rtemis)
library(data.table)  # if needed

# Create minimal data
set.seed(2025)
n <- 100
x <- rnormmat(n, 3)
y <- x[, 1] + x[, 2] + rnorm(n)
dat <- data.frame(x, y)

# Demonstrate the issue
mod <- train(
  x = dat,
  algorithm = "glm"
)

# Expected: Model trains successfully
# Actual: Error message...
```

#### Error Messages

Include **complete error messages** with full stack traces. If the error is verbose, use a code block:

```
Error in train(...):
! You must define either `hyperparameters` or `algorithm`.
```

#### Session Info (for complex issues)

For crashes or environment-specific issues, include:

```r
sessionInfo()
```

### Feature Requests

For feature requests, please describe:

1. **Use case**: What problem would this solve?
2. **Proposed solution**: How should it work?
3. **Alternatives considered**: What workarounds exist currently?
4. **Impact**: Who would benefit from this feature?

**Example:**

> **Use case**: I frequently need to train models with time-series cross-validation but the current resampling methods don't preserve temporal order.
>
> **Proposed solution**: Add `setup_TimeSeriesCV()` that creates train/test splits respecting time ordering.
>
> **Alternatives**: Currently using custom resampling with `outer_resampling` parameter, but it's verbose and error-prone.

### Documentation Issues

For documentation improvements:

1. **Location**: Specify which page or function (e.g., `?train`, `?setup_GLMNET`)
2. **Problem**: What's unclear, incorrect, or missing?
3. **Suggestion**: How could it be improved?

## Version-Specific Notes

### rtemis 0.99+ vs. rtemisalpha (Legacy)

**Important**: This repository contains **rtemis 0.99+**, a complete rewrite using S7 classes. If you're using the legacy version (`rtemisalpha`), please note:

- Legacy issues should reference [rtemis-legacy](https://github.com/rtemis-org/rtemis-legacy) (unmaintained)
- Migration questions are welcome here
- API differences are expected (see README.md for major changes)

### Active Development

rtemis 0.99+ is under active development. Features may change between releases. When reporting issues:

- Specify your branch if not using `main` (check with `git branch`)
- Note if the issue appears in a specific algorithm (some are being ported from the legacy version)

## What Happens Next?

1. **Triage**: Maintainers will review and label your issue
2. **Discussion**: We may ask for clarification or additional details
3. **Resolution**: 
   - **Bugs**: Fixed in upcoming releases, referenced in commit messages
   - **Features**: Evaluated for inclusion in roadmap
   - **Questions**: Answered or redirected to appropriate resources

## Code of Conduct

Be respectful and constructive. We're all here to improve rtemis together.

## Pull Requests

While this guide focuses on issues, pull requests are welcome! Key points:

- Discuss major changes in an issue first
- Follow existing code style (S7 classes, roxygen2 documentation)
- All `@param` must follow format: `Class: Description ending with period.`
- Include tests for new functionality
- Update documentation as needed

## Questions?

- **General usage**: [GitHub Discussions](https://github.com/rtemis-org/rtemis/discussions)
- **Bug reports/features**: [GitHub Issues](https://github.com/rtemis-org/rtemis/issues)
- **Security issues**: Contact maintainers directly (see DESCRIPTION file)

---

Thank you for contributing to rtemis.


================================================
FILE: .github/workflows/R-CMD-check.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help

on:
  pull_request:
  push:
    branches: [main]
  workflow_dispatch:

name: R-CMD-check

permissions: read-all

concurrency:
  group: R-CMD-check-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

jobs:
  R-CMD-check:
    runs-on: ubuntu-latest

    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
      R_KEEP_PKG_SOURCE: yes

    steps:
      - uses: actions/checkout@v4
      - uses: r-lib/actions/setup-pandoc@v2
      - uses: r-lib/actions/setup-r@v2
        with:
          use-public-rspm: true
      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          extra-packages: any::rcmdcheck
          needs: check
      - uses: r-lib/actions/check-r-package@v2
        with:
          upload-snapshots: true
          build_args: 'c("--no-manual", "--compact-vignettes=gs+qpdf")'
          error-on: '"note"'


================================================
FILE: .gitignore
================================================
# Dev
__dev/
__validation/
dev/
__out/
specs/

# Mac OS
.DS_Store

# VS Code
.vscode
*.code-workspace

# R History files
.Rhistory
.Rapp.history

# Output files from R CMD build
/*.tar.gz

# Output files from R CMD check
/*.Rcheck/

# pkgdown
_pkgdown.yml
pkgdown/

# Air
air.toml

# produced vignettes
vignettes/*.html
vignettes/*.pdf

# Temporary files created by R markdown
*.utf8.md
*.knit.md

# lintr
.lintr

# CRAN
cran-comments.md

# Manual
*.pdf

# Assistants
AGENTS.md
.claude/
SKILL.md

================================================
FILE: DESCRIPTION
================================================
Package: rtemis
Version: 1.2.0
Title: Machine Learning and Visualization
Date: 2026-05-12
Authors@R: person(given = "E.D.", family = "Gennatas", role = c("aut", "cre", "cph"), 
    email = "gennatas@gmail.com", comment = c(ORCID = "0000-0001-9280-3609"))
Description: Machine learning and visualization package with an 'S7' backend 
    featuring comprehensive type checking and validation, paired with an efficient functional
    user-facing API. train(), cluster(), and decomp() provide one-call access to supervised and 
    unsupervised learning. All configuration steps are performed using setup functions and 
    validated. A single call to train() handles preprocessing, hyperparameter tuning, and testing 
    with nested resampling. Supports 'data.frame', 'data.table', and 'tibble' inputs, parallel 
    execution, and interactive visualizations. The package first appeared in E.D. Gennatas (2017)
    <https://repository.upenn.edu/entities/publication/d81892ea-3087-4b71-a6f5-739c58626d64>.
License: GPL (>= 3)
URL: https://www.rtemis.org, https://docs.rtemis.org/r/ml, https://docs.rtemis.org/r/ml-api/
BugReports: https://github.com/rtemis-org/rtemis/issues
ByteCompile: yes
Depends:
    R (>= 4.1.0)
Imports:
    grDevices,
    graphics,
    stats,
    methods,
    utils,
    S7,
    data.table,
    future,
    htmltools,
    cli
Suggests:
    arrow,
    bit64,
    car,
    colorspace,
    DBI,
    dbscan,
    dendextend (>= 0.18.0),
    duckdb,
    e1071,
    farff,
    fastICA,
    flexclust,
    future.apply,
    future.mirai,
    futurize,
    geosphere,
    ggplot2,
    glmnet,
    geojsonio,
    glue,
    grid,
    gsubfn,
    haven,
    heatmaply,
    htmlwidgets,
    igraph,
    jsonlite,
    later,
    leaflet,
    leaps,
    lightAUC,
    lightgbm,
    matrixStats,
    mgcv,
    mice,
    mirai,
    missRanger,
    nanonext,
    networkD3,
    NMF,
    openxlsx,
    parallelly,
    partykit,
    plotly,
    plumber,
    pROC,
    progressr,
    psych,
    pvclust,
    ranger,
    reactable,
    readxl,
    reticulate,
    ROCR,
    rpart,
    Rtsne,
    seqinr,
    sf,
    shapr,
    survival,
    tabnet,
    threejs,
    testthat (>= 3.0.0),
    tibble,
    timeDate,
    toml,
    torch,
    uwot,
    vegan,
    vroom,
    withr
Encoding: UTF-8
Config/testthat/edition: 3
Roxygen: list(markdown = TRUE)
LazyData: true
Config/roxygen2/version: 8.0.0


================================================
FILE: LICENSE.md
================================================
                    GNU GENERAL PUBLIC LICENSE
                       Version 3, 29 June 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU General Public License is a free, copyleft license for
software and other kinds of works.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.  We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors.  You can apply it to
your programs, too.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights.  Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.

  For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received.  You must make sure that they, too, receive
or can get the source code.  And you must show them these terms so they
know their rights.

  Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.

  For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software.  For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.

  Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so.  This is fundamentally incompatible with the aim of
protecting users' freedom to change the software.  The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable.  Therefore, we
have designed this version of the GPL to prohibit the practice for those
products.  If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.

  Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary.  To prevent this, the GPL assures that
patents cannot be used to render the program non-free.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Use with the GNU Affero General Public License.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time.  Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:

    <program>  Copyright (C) <year>  <name of author>
    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License.  Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.

  The GNU General Public License does not permit incorporating your program
into proprietary programs.  If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library.  If this is what you want to do, use the GNU Lesser General
Public License instead of this License.  But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.


================================================
FILE: Makefile
================================================
PKG := $(shell awk '/^Package:/{print $$2; exit}' DESCRIPTION)
R ?= R
RSCRIPT ?= Rscript
CHECK_DIR := $(PKG).Rcheck
TARBALL_GLOB := $(PKG)_*.tar.gz

msg = @printf '\033[38;2;108;163;160m[%s] %s\033[0m\n' "$$(date -u '+%Y-%m-%d %H:%M:%SZ')" "$(1)"

.DEFAULT_GOAL := help

.PHONY: help format document install test build check check-cran check-cran-no-tests site clean

help:
	$(call msg,Available targets:)
	@printf '%s\n' \
		'  format      Format R code with air CLI (if available)' \
		'  document    Generate roxygen2 documentation' \
		'  install     Document and install the package locally with pak' \
		'  test        Run testthat::test_local(stop_on_failure = TRUE)' \
		'  build       Build the source tarball' \
		'  check       Run R CMD check on the built tarball' \
		'  check-cran  Run R CMD check --as-cran' \
		'  check-cran-no-tests  Run R CMD check --as-cran --no-tests' \
		'  manual      Build package manual' \
		'  site        Build pkgdown site' \
		'  clean       Remove tarballs and .Rcheck output'

format:
	$(call msg,─── Formatting $(PKG) package... ───)
	@if command -v air >/dev/null 2>&1; then \
		air format .; \
	else \
		echo "   Note: 'air' CLI not found — skipping R code formatting."; \
	fi
	$(call msg,Done)

document: format
	$(call msg,─── Documenting $(PKG) package... ───)
	$(RSCRIPT) -e "roxygen2::roxygenize()"
	$(call msg,Done)

install: document
	$(call msg,─── Installing $(PKG) package... ───)
	$(RSCRIPT) -e "pak::local_install(upgrade = TRUE)"
	$(call msg,Done)

test:
	$(call msg,─── Running testthat tests for $(PKG)... ───)
	$(RSCRIPT) -e "testthat::test_local(stop_on_failure = TRUE)"
	$(call msg,Done)

build: clean
	$(call msg,─── Building $(PKG) package... ───)
	$(R) CMD build .
	$(call msg,Done)

check: build
	$(call msg,─── Running R CMD check on $(PKG)... ───)
	$(R) CMD check $(TARBALL_GLOB)
	rm -f $(TARBALL_GLOB)
	$(call msg,Done)

check-cran: build
	$(call msg,─── Running R CMD check --as-cran on $(PKG)... ───)
	$(R) CMD check $(TARBALL_GLOB) --as-cran
	rm -f $(TARBALL_GLOB)
	$(call msg,Done)

check-cran-no-tests: build
	$(call msg,─── Running R CMD check --as-cran on $(PKG)... ───)
	$(R) CMD check $(TARBALL_GLOB) --as-cran --no-tests
	rm -f $(TARBALL_GLOB)
	$(call msg,Done)

manual:
	$(call msg,─── Building manual for $(PKG)... ───)
	$(R) CMD Rd2pdf . --output=$(PKG).pdf
	$(call msg,Done)

site:
	$(call msg,─── Building pkgdown site for $(PKG)... ───)
	$(RSCRIPT) -e "pkgdown::build_site()"
	$(call msg,Done)

clean:
	$(call msg,─── Cleaning build artifacts... ───)
	rm -rf $(CHECK_DIR)
	rm -f $(TARBALL_GLOB)
	$(call msg,Done)


================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand

S3method(plot,MassGLM)
export("%BC%")
export(.list_to_Hyperparameters)
export(.list_to_ResamplerConfig)
export(.list_to_TunerConfig)
export(available_clustering)
export(available_decomposition)
export(available_draw)
export(available_supervised)
export(available_themes)
export(calibrate)
export(check_data)
export(choose_theme)
export(class_imbalance)
export(classification_metrics)
export(clean_colnames)
export(clean_names)
export(cluster)
export(col2grayscale)
export(color_adjust)
export(ddSci)
export(ddb_collect)
export(ddb_data)
export(decomp)
export(describe)
export(df_movecolumn)
export(df_nunique_perfeat)
export(draw_3Dscatter)
export(draw_bar)
export(draw_box)
export(draw_calibration)
export(draw_confusion)
export(draw_dist)
export(draw_fit)
export(draw_graphD3)
export(draw_graphjs)
export(draw_heatmap)
export(draw_leaflet)
export(draw_pie)
export(draw_protein)
export(draw_pvals)
export(draw_roc)
export(draw_scatter)
export(draw_spectrogram)
export(draw_survfit)
export(draw_table)
export(draw_ts)
export(draw_varimp)
export(draw_volcano)
export(draw_xt)
export(dt_describe)
export(dt_inspect_types)
export(dt_keybin_reshape)
export(dt_merge)
export(dt_names_by_attr)
export(dt_nunique_perfeat)
export(dt_pctmatch)
export(dt_pctmissing)
export(dt_set_autotypes)
export(dt_set_clean_all)
export(dt_set_cleanfactorlevels)
export(dt_set_logical2factor)
export(dt_set_one_hot)
export(exc)
export(feature_matrix)
export(feature_names)
export(features)
export(get_factor_names)
export(get_mode)
export(get_msg_sink)
export(get_palette)
export(getcharacternames)
export(getdatenames)
export(getfactornames)
export(getlogicalnames)
export(getnames)
export(getnamesandtypes)
export(getnumericnames)
export(inc)
export(index_col_by_attr)
export(init_project_dir)
export(inspect)
export(inspect_type)
export(is_constant)
export(labelify)
export(massGLM)
export(matchcases)
export(mgetnames)
export(names_by_class)
export(one_hot2factor)
export(outcome)
export(outcome_name)
export(plot_manhattan)
export(plot_manhattan.MassGLM)
export(plot_roc)
export(plot_true_pred)
export(plot_varimp)
export(preprocess)
export(preprocess.class_tabular.Preprocessor)
export(preprocess.class_tabular.PreprocessorConfig)
export(preprocessed)
export(present)
export(previewcolor)
export(read)
export(read_config)
export(regression_metrics)
export(resample)
export(rnormmat)
export(rtemis_colors)
export(rtversion)
export(runifmat)
export(set_msg_sink)
export(set_outcome)
export(setdiffsym)
export(setup_CART)
export(setup_CMeans)
export(setup_DBSCAN)
export(setup_ExecutionConfig)
export(setup_GAM)
export(setup_GLM)
export(setup_GLMNET)
export(setup_GridSearch)
export(setup_HardCL)
export(setup_ICA)
export(setup_Isomap)
export(setup_Isotonic)
export(setup_KMeans)
export(setup_LightCART)
export(setup_LightGBM)
export(setup_LightRF)
export(setup_LightRuleFit)
export(setup_LinearSVM)
export(setup_NMF)
export(setup_NeuralGas)
export(setup_PCA)
export(setup_Preprocessor)
export(setup_RadialSVM)
export(setup_Ranger)
export(setup_Resampler)
export(setup_SuperConfig)
export(setup_SuperConfigLive)
export(setup_TabNet)
export(setup_UMAP)
export(setup_tSNE)
export(size)
export(table_column_attr)
export(theme_black)
export(theme_blackgrid)
export(theme_blackigrid)
export(theme_darkgray)
export(theme_darkgraygrid)
export(theme_darkgrayigrid)
export(theme_lightgraygrid)
export(theme_mediumgraygrid)
export(theme_white)
export(theme_whitegrid)
export(theme_whiteigrid)
export(to_json)
export(train)
export(uniprot_get)
export(with_msg_sink)
export(write_toml)
export(xtdescribe)
import(S7)
import(data.table)
import(grDevices)
import(graphics)
import(htmltools)
import(methods)
import(stats)
importFrom(utils,getFromNamespace)
importFrom(utils,head)
importFrom(utils,packageVersion)
importFrom(utils,sessionInfo)
importFrom(utils,tail)


================================================
FILE: NEWS.md
================================================
# rtemis news

## 1.0.0 First CRAN release

## 1.0.1

- Introduce `VariableImportance` S7 class to represent variable importance data, allowing for more than one measure of importance per model and update all relevant classes and methods.
- Calculate Partial_Effect_Variance as variable importance measure for GAM models
- Add `execution_config` argument to internal `train_` method and use it in LightRuleFit to propagate to LightGBM and GLMNET calls.

================================================
FILE: R/00_S7init.R
================================================
# S7_init.R
# ::rtemis::
# 2025- EDG rtemis.org

# References
# S7 generics: https://rconsortium.github.io/S7/articles/generics-methods.html

# %% --- S3 Classes for S7 ----------------------------------------------------------------------------
class_data.table <- new_S3_class("data.table")
class_lgb.Booster <- new_S3_class("lgb.Booster")
# All internal methods should support data.frame, data.table, tbl_df
class_tabular <- new_union(class_data.frame, class_data.table)
# Supervised learning model classes
class_glm <- new_S3_class("glm")
class_gam <- new_S3_class("gam")
class_glmnet <- new_S3_class("glmnet")
class_cv.glmnet <- new_S3_class("cv.glmnet")
class_stepfun <- new_S3_class("stepfun") # Isotonic regression
class_rpart <- new_S3_class("rpart")
class_ranger <- new_S3_class("ranger")
class_svm <- new_S3_class("svm")
class_tabnet_fit <- new_S3_class("tabnet_fit")


# %% --- Generics -------------------------------------------------------------------------------------
# %% repr ----
#' String representation
#'
#' @param x rtemis object.
#'
#' @return Character string representation of the object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
repr <- new_generic("repr", "x")


# %% inspect ----
#' Inspect rtemis object
#'
#' @param x R object to inspect.
#'
#' @return Called for side effect of printing information to console; returns character string
#' invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' inspect(iris)
inspect <- new_generic("inspect", "x", function(x) {
  S7_dispatch()
}) # /rtemis::inspect


# %% preprocess ----
#' @name
#' preprocess
#'
#' @title
#' Preprocess Data
#'
#' @description
#' Preprocess data for analysis and visualization.
#'
#' @details
#' Methods are provided for preprocessing training set data, which accepts a `PreprocessorConfig`
#' object, and for preprocessing validation and test set data, which accept a `Preprocessor`
#' object.
#'
#' @return `Preprocessor` object.
#'
#' @author EDG
#' @rdname preprocess
#' @export
#'
#' @examples
#' # Setup a `Preprocessor`: this outputs a `PreprocessorConfig` object.
#' prp <- setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE)
#'
#' # Includes a long list of parameters
#' prp
#'
#' # Resample iris to get train and test data
#' res <- resample(iris, setup_Resampler(seed = 2026))
#' iris_train <- iris[res[[1]], ]
#' iris_test <- iris[-res[[1]], ]
#'
#' # Preprocess training data
#' iris_pre <- preprocess(iris_train, prp)
#'
#' # Access preprocessd training data with `preprocessed()`
#' preprocessed(iris_pre)
#'
#' # Apply the same preprocessing to test data
#' # In this case, the scale and center values from training data will be used.
#' # Note how `preprocess()` accepts either a `PreprocessorConfig` or `Preprocessor` object for
#' # this reason.
#' iris_test_pre <- preprocess(iris_test, iris_pre)
#'
#' # Access preprocessed test data
#' preprocessed(iris_test_pre)
preprocess <- new_generic("preprocess", c("x", "config"))


# %% train_ ----
#' Generic for training supervised learning models
#'
#' @description
#' Internal S7 generic that dispatches algorithm-specific training based on
#' `Hyperparameters` class. Called by `train()`.
#'
#' @param hyperparameters `Hyperparameters` object: Algorithm-specific hyperparameters.
#' @param x tabular data: Training set.
#' @param weights Optional Numeric vector: Case weights.
#' @param dat_validation Optional tabular data: Validation set for algorithms that support early stopping.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Algorithm-specific fitted model object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
train_ <- new_generic(
  "train_",
  "hyperparameters",
  function(
    hyperparameters,
    x,
    weights = NULL,
    dat_validation = NULL,
    execution_config = setup_ExecutionConfig(),
    verbosity = 1L
  ) {
    S7_dispatch()
  }
) # /rtemis::train_


# %% predict_super ----
#' Predict from supervised learning model (internal)
#'
#' @description
#' Internal S7 generic that dispatches algorithm-specific prediction based on
#' model class.
#'
#' @param model Fitted model object.
#' @param newdata tabular data: New data for prediction.
#' @param type Character: Type of supervised learning ("Classification" or "Regression").
#' @param ... Additional arguments (not currently used).
#'
#' @return Predictions (class probabilities for classification, numeric for regression).
#'
#' @author EDG
#' @keywords internal
#' @noRd
predict_super <- new_generic(
  "predict_super",
  "model",
  function(model, newdata, type = NULL, verbosity = 0L) {
    S7_dispatch()
  }
) # /rtemis::predict_super


# %% varimp_super ----
#' Get variable importance (internal)
#'
#' @description
#' Internal S7 generic that dispatches algorithm-specific variable importance
#' extraction based on model class.
#'
#' @param object Fitted model object.
#'
#' @return Numeric vector of variable importance scores (named by feature).
#'
#' @author EDG
#' @keywords internal
#' @noRd
varimp_super <- new_generic(
  "varimp_super",
  "model",
  function(model, ...) {
    S7_dispatch()
  }
) # /rtemis::varimp_super


# %% se_super ----
#' Get standard errors of predictions (internal)
#'
#' @description
#' Internal S7 generic for extracting standard errors from regression models.
#'
#' @param object Fitted model object.
#' @param newdata tabular data: New data for prediction.
#'
#' @return Numeric vector of standard errors.
#'
#' @author EDG
#' @keywords internal
#' @noRd
se_super <- new_generic(
  "se_super",
  "model",
  function(model, newdata) {
    S7_dispatch()
  }
)


# %% se ----
# Standard error of the fit.
se <- new_generic("se", "x")


# %% decomp_ ----
#' Generic for decomposition
#'
#' @author EDG
#' @keywords internal
#' @noRd
decomp_ <- new_generic(
  "decomp_",
  "config",
  function(config, x, verbosity = 1L) {
    S7_dispatch()
  }
) # /rtemis::decomp_


# %% cluster_ ----
#' Generic for clustering
#'
#' @author EDG
#' @keywords internal
#' @noRd
cluster_ <- new_generic(
  "cluster_",
  "config",
  function(config, x, verbosity = 1L) {
    S7_dispatch()
  }
) # /rtemis::cluster_


# %% desc ----
#' Short description for inline printing.
#' This is like `repr` for single-line descriptions.
#'
#' @author EDG
#' @keywords internal
#' @noRd
desc <- new_generic("desc", "x")


# %% get_metric ----
#' Get metric
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_metric <- new_generic("get_metric", "x")


# %% validate_hyperparameters ----
#' Check hyperparameters given training data
#'
#' @param x tabular data: Training data.
#' @param hyperparameters `Hyperparameters` to check.
#'
#' @author EDG
#' @keywords internal
#' @noRd
validate_hyperparameters <- new_generic(
  "validate_hyperparameters",
  "x",
  function(x, hyperparameters) {
    S7_dispatch()
  }
) # /rtemis::validate_hyperparameters


# %% plot_metric ----
#' Plot Metric
#'
#' @description
#' Plot metric for `SupervisedRes` objects.
#'
#' @param x `SupervisedRes` object.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @return plotly object
#'
#' @author EDG
#' @keywords internal
#' @noRd
plot_metric <- new_generic("plot_metric", "x")


# %% plot_roc ----
#' Plot ROC curve
#'
#' @description
#' This generic is used to plot the ROC curve for a model.
#'
#' @param x `Classification` or `ClassificationRes` object.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @return A plotly object containing the ROC curve.
#'
#' @author EDG
#' @export
#'
#' @examples
#' ir <- iris[51:150, ]
#' ir[["Species"]] <- factor(ir[["Species"]])
#' species_glm <- train(ir, algorithm = "GLM")
#' plot_roc(species_glm)
plot_roc <- new_generic("plot_roc", "x")


# %% plot_varimp ----
#' Plot Variable Importance
#'
#' @description
#' Plot Variable Importance for Supervised objects.
#'
#' @param x `Supervised` or `SupervisedRes` object.
#' @param ... Additional arguments passed to methods.
#'
#' @details
#' This method calls [draw_varimp] internally.
#' If you pass an integer to the `plot_top` argument, the method will plot this many top features.
#' If you pass a number between 0 and 1 to the `plot_top` argument, the method will plot this
#' fraction of top features.
#'
#' @return plotly object or invisible NULL if no variable importance is available.
#'
#' @author EDG
#' @export
#'
#' @seealso [draw_varimp], which is called by this method
#'
#' @examplesIf interactive()
#' ir <- set_outcome(iris, "Sepal.Length")
#' seplen_cart <- train(ir, algorithm = "CART")
#' plot_varimp(seplen_cart)
#' # Plot horizontally
#' plot_varimp(seplen_cart, orientation = "h")
#' plot_varimp(seplen_cart, orientation = "h", plot_top = 3L)
#' plot_varimp(seplen_cart, orientation = "h", plot_top = 0.5)
plot_varimp <- new_generic("plot_varimp", "x")


# %% plot_true_pred ----
#' Plot True vs. Predicted Values
#'
#' @description
#' Plot True vs. Predicted Values for Supervised objects.
#' For classification, it plots a confusion matrix.
#' For regression, it plots a scatter plot of true vs. predicted values.
#'
#' @param x `Supervised` or `SupervisedRes` object.
#' @param ... Additional arguments passed to methods.
#'
#' @return plotly object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- set_outcome(iris, "Sepal.Length")
#' sepallength_glm <- train(x, algorithm = "GLM")
#' plot_true_pred(sepallength_glm)
plot_true_pred <- new_generic("plot_true_pred", "x")


# %% plot_manhattan ----
#' Manhattan plot
#'
#' @description
#' Draw a Manhattan plot for `MassGLM` objects created with [massGLM].
#'
#' @param x `MassGLM` object.
#' @param ... Additional arguments passed to methods.
#'
#' @return plotly object.
#'
#' @author EDG
#' @export
# example included in `plot_manhattan.MassGLM` method.
plot_manhattan <- new_generic("plot_manhattan", "x")


# %% describe ----
#' Describe object
#'
#' @param x R object to describe. See method documentation for supported classes.
#' @param ... Additional arguments passed to methods. See details.
#'
#' @details
#' Extra arguments for `factor` method:
#' - `max_n`: Integer: Return counts for up to this many levels.
#' - `return_ordered`: Logical: If TRUE, return levels ordered by count, otherwise return in level order.
#' - `verbosity`: Integer: Verbosity level.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # --- For `Supervised` objects ---
#' species_lightrf <- train(iris, algorithm = "lightrf")
#' describe(species_lightrf)
#'
#' # --- For `SupervisedRes` objects ---
#' mod <- train(iris, algorithm = "CART", outer_resampling_config = setup_Resampler())
#' describe(mod)
#'
#' # --- For factors ---
#' # Small number of levels
#' describe(iris[["Species"]])
#'
#' # Large number of levels: show top n by count
#' x <- factor(sample(letters, 1000, TRUE))
#' describe(x)
#' describe(x, 3)
#' describe(x, 3, return_ordered = FALSE)
describe <- new_generic("describe", "x")


# %% present ----
#' Present rtemis object
#'
#' @description
#' This generic is used to present an rtemis object by printing to console and drawing plots.
#'
#' @param x `Supervised` or `SupervisedRes` object or list of such objects.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @return A plotly object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' ir <- set_outcome(iris, "Sepal.Length")
#' seplen_lightrf <- train(ir, algorithm = "lightrf")
#' present(seplen_lightrf)
present <- new_generic("present", "x")


# %% get_hyperparams_need_tuning ----
#' Get hyperparameters that need tuning.
#'
#' @return Character vector of hyperparameter names that need tuning.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_hyperparams_need_tuning <- new_generic("get_hyperparams_need_tuning", "x")


# %% get_hyperparams ----
#' Get hyperparameters.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_hyperparams <- new_generic("get_hyperparams", c("x", "param_names"))


# %% extract_rules ----
#' Extract rules from a model.
#'
#' @author EDG
#' @keywords internal
#' @noRd
extract_rules <- new_generic("extract_rules", "x")


# %% get_factor_levels ----
#' @name get_factor_levels
#'
#' @title
#' Get factor levels from data.frame or similar
#'
#' @usage
#' get_factor_levels(x)
#'
#' @param x tabular data.
#'
#' @return Named list of factor levels. Names correspond to column names.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_factor_levels <- new_generic(
  "get_factor_levels",
  "x",
  function(x) S7_dispatch()
)

method(get_factor_levels, class_data.frame) <- function(x) {
  factor_index <- which(sapply(x, is.factor))
  lapply(x[, factor_index, drop = FALSE], levels)
}

method(get_factor_levels, class_data.table) <- function(x) {
  factor_index <- which(sapply(x, is.factor))
  lapply(x[, factor_index, with = FALSE], levels)
}


# %% to_html ----
#' Convert to HTML
#'
#' @author EDG
#' @keywords internal
#' @noRd
to_html <- new_generic("to_html", "x")


# %% to_toml ----
#' Convert to TOML
#'
#' @author EDG
#' @keywords internal
#' @noRd
to_toml <- new_generic("to_toml", "x")


# %% to_yaml ----
#' Convert to YAML
#'
#' @author EDG
#' @keywords internal
#' @noRd
to_yaml <- new_generic("to_yaml", "x")


# %% to_json ----
#' Convert to JSON-serializable list
#'
#' Convert an rtemis S7 object to a named list suitable for
#' `jsonlite::toJSON(auto_unbox = TRUE)`. Used by the rtemislive backend
#' to send structured results to the browser frontend without scraping
#' R console output.
#'
#' Each output list includes a `.class` field equal to the most specific
#' S7 class name, allowing the frontend to dispatch to a class-specific
#' renderer.
#'
#' The default method walks `props(x)`, recursing into S7-typed properties
#' and passing through primitive properties as-is. Per-class methods
#' override where the default isn't appropriate (e.g. classes whose props
#' include a `data.table`, an opaque model fit, or where some props should
#' be excluded for size or relevance reasons).
#'
#' @param x rtemis S7 object.
#' @param ... Additional arguments passed to method.
#'
#' @return Named list. Pass through `jsonlite::toJSON(auto_unbox = TRUE)`
#' for serialization.
#'
#' @author EDG
#' @keywords internal
#' @export
to_json <- new_generic("to_json", "x")


# %% to_json default ----
#' @name to_json
#' @keywords internal
#' @noRd
method(to_json, S7_object) <- function(x, ...) {
  ps <- props(x)
  body <- lapply(ps, .to_json_value)
  c(list(.class = S7_class(x)@name), body)
} # /rtemis::to_json.S7_object


#' Recursively convert a value to a JSON-serializable form
#'
#' Handles the common composite shapes encountered when walking S7 props:
#' nested S7 objects (recurse via the generic), lists that may *contain*
#' S7 objects (recurse element-wise), and primitives / data.frames
#' (pass through — jsonlite supports them natively).
#'
#' @param v Value from an S7 property.
#'
#' @return JSON-serializable value.
#'
#' @author EDG
#' @keywords internal
#' @noRd
.to_json_value <- function(v) {
  if (is.null(v)) {
    return(NULL)
  }
  if (S7_inherits(v)) {
    return(to_json(v))
  }
  # data.frame / data.table are list-like but jsonlite handles them natively.
  if (is.list(v) && !is.data.frame(v)) {
    return(lapply(v, .to_json_value))
  }
  v
} # /rtemis::.to_json_value


# %% write_toml ----
#' @name
#' write_toml
#'
#' @title
#' Write to TOML file
#'
#' @author EDG
#' @export
# examples include in method documentation
write_toml <- new_generic(
  "write_toml",
  "x",
  function(x, file, overwrite = FALSE, verbosity = 1L) {
    S7_dispatch()
  }
) # /rtemis::write_toml


# %% inc ----
#' Select (include) columns by character or numeric vector.
#'
#' @param x tabular data.
#' @param idx Character or numeric vector: Column names or indices to include.
#'
#' @return data.frame, tibble, or data.table.
#'
#' @author EDG
#' @export
#'
#' @examples
#' inc(iris, c(3, 4)) |> head()
#' inc(iris, c("Sepal.Length", "Species")) |> head()
inc <- new_generic("inc", "x", function(x, idx) {
  S7_dispatch()
})


# %% exc ----
#' Exclude columns by character or numeric vector.
#'
#' @param x tabular data.
#' @param idx Character or numeric vector: Column names or indices to exclude.
#'
#' @return data.frame, tibble, or data.table.
#'
#' @author EDG
#' @export
#'
#' @examples
#' exc(iris, "Species") |> head()
#' exc(iris, c(1, 3)) |> head()
exc <- new_generic("exc", c("x", "idx"), function(x, idx) {
  S7_dispatch()
})

method(inc, class_data.frame) <- function(x, idx) {
  x[, idx, drop = FALSE]
}

method(inc, class_data.table) <- function(x, idx) {
  x[, .SD, .SDcols = idx]
}

method(exc, list(class_data.frame, class_character)) <- function(x, idx) {
  x[, -which(names(x) %in% idx), drop = FALSE]
}

method(exc, list(class_data.frame, class_integer)) <- function(x, idx) {
  x[, -idx, drop = FALSE]
}

method(exc, list(class_data.frame, class_double)) <- function(x, idx) {
  idx <- clean_int(idx)
  x[, -idx, drop = FALSE]
}

method(
  exc,
  list(class_data.table, class_character | class_integer)
) <- function(x, idx) {
  x[, .SD, .SDcols = -idx]
}

method(exc, list(class_data.table, class_double)) <- function(x, idx) {
  idx <- clean_int(idx)
  x[, .SD, .SDcols = -idx]
}


# %% outcome_name ----
#' Get the name of the last column
#'
#' @details
#' This applied to tabular datasets used for supervised learning in rtemis,
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data.
#'
#' @return Name of the last column.
#'
#' @author EDG
#' @export
#'
#' @examples
#' outcome_name(iris)
outcome_name <- new_generic("outcome_name", "x", function(x) {
  S7_dispatch()
})

method(outcome_name, class_data.frame) <- function(x) {
  names(x)[NCOL(x)]
} # /rtemis::outcome_name


# %% outcome ----
#' Get the outcome as a vector
#'
#' Returns the last column of `x`, which is by convention the outcome variable.
#'
#' @details
#' This applied to tabular datasets used for supervised learning in rtemis,
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data.
#'
#' @return Vector containing the last column of `x`.
#'
#' @author EDG
#' @export
#'
#' @examples
#' outcome(iris)
outcome <- new_generic("outcome", "x", function(x) {
  S7_dispatch()
}) # /rtemis::outcome

method(outcome, class_data.frame) <- function(x) {
  x[[NCOL(x)]]
}


# %% features ----
#' Get features from tabular data
#'
#' Returns all columns except the last one.
#'
#' @details
#' This can be applied to tabular datasets used for supervised learning in \pkg{rtemis},
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data: Input data to get features from.
#'
#' @return Object of the same class as the input, after removing the last column.
#'
#' @author EDG
#' @export
#'
#' @examples
#' features(iris) |> head()
features <- new_generic("features", "x", function(x) {
  S7_dispatch()
}) # /rtemis::features

method(features, class_data.frame) <- function(x) {
  if (NCOL(x) < 2) {
    cli::cli_abort("Input must have at least 2 columns.")
  }
  x[, -NCOL(x), drop = FALSE]
}

method(features, class_data.table) <- function(x) {
  if (NCOL(x) < 2) {
    cli::cli_abort("Input must have at least 2 columns.")
  }
  x[, -NCOL(x), with = FALSE]
} # /rtemis::features.class_data.table


# %% feature_names ----
#' Get feature names
#'
#' Returns all column names except the last one
#'
#' @details
#' This applied to tabular datasets used for supervised learning in rtemis,
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data.
#'
#' @return Character vector of feature names.
#'
#' @author EDG
#' @export
#'
#' @examples
#' feature_names(iris)
feature_names <- new_generic("feature_names", "x", function(x) {
  S7_dispatch()
}) # /rtemis::feature_names

method(feature_names, class_data.frame) <- function(x) {
  if (NCOL(x) < 2) {
    cli::cli_abort("Input must have at least 2 columns.")
  }
  names(x)[-NCOL(x)]
} # /rtemis::feature_names.class_data.frame


# %% check_factor_levels ----
#' Check factor levels
#'
#' @author EDG
#' @keywords internal
#' @noRd
check_factor_levels <- new_generic("check_factor_levels", c("x"))


# %% get_factor_names ----
#' Get factor names
#'
#' @details
#' This applied to tabular datasets used for supervised learning in rtemis,
#' where, by convention, the last column is the outcome variable and all other columns
#' are features.
#'
#' @param x tabular data.
#'
#' @return Character vector of factor names.
#'
#' @author EDG
#' @export
#'
#' @examples
#' get_factor_names(iris)
get_factor_names <- new_generic("get_factor_names", "x", function(x) {
  S7_dispatch()
}) # /rtemis::get_factor_names

method(get_factor_names, class_data.frame) <- function(x) {
  names(x)[sapply(x, is.factor)]
}


# %% calibrate ----
#' Calibrate `Classification` & `ClassificationRes` Models
#'
#' @description
#' Generic function to calibrate binary classification models.
#'
#' @param x `Classification` or `ClassificationRes` object to calibrate.
#' @param algorithm Character: Algorithm to use to train calibration model.
#' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions.
#' @param verbosity Integer: Verbosity level.
#' @param ... Additional arguments passed to specific methods.
#'
#' @section Method-specific parameters:
#'
#' **For `Classification` objects:**
#' * `predicted_probabilities`: Numeric vector of predicted probabilities
#' * `true_labels`: Factor of true class labels
#'
#' **For `ClassificationRes` objects:**
#' * `resampler_config`: `ResamplerConfig` object for calibration training
#' * `train_verbosity`: Integer controlling calibration model training output
#'
#' @details
#' The goal of calibration is to adjust the predicted probabilities of a binary classification
#' model so that they better reflect the true probabilities (i.e. empirical risk) of the positive
#' class.
#'
#' @return Calibrated model object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # --- Calibrate Classification ---
#' dat <- iris[51:150, ]
#' res <- resample(dat)
#' dat$Species <- factor(dat$Species)
#' dat_train <- dat[res[[1]], ]
#' dat_test <- dat[-res[[1]], ]
#'
#' # Train GLM on a training/test split
#' mod_c_glm <- train(
#'   x = dat_train,
#'   dat_test = dat_test,
#'   algorithm = "glm"
#' )
#'
#' # Calibrate the `Classification` by defining `predicted_probabilities` and `true_labels`,
#' # in this case using the training data, but it could be a separate calibration dataset.
#' mod_c_glm_cal <- calibrate(
#'   mod_c_glm,
#'   predicted_probabilities = mod_c_glm$predicted_prob_training,
#'   true_labels = mod_c_glm$y_training
#' )
#' mod_c_glm_cal
#'
#' # --- Calibrate ClassificationRes ---
#'
#' # Train GLM with cross-validation
#' resmod_c_glm <- train(
#'   x = dat,
#'   algorithm = "glm",
#'   outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
#' )
#'
#' # Calibrate the `ClassificationRes` using the same resampling configuration as used for training.
#' resmod_c_glm_cal <- calibrate(resmod_c_glm)
#' resmod_c_glm_cal
calibrate <- new_generic(
  "calibrate",
  ("x"),
  function(
    x,
    algorithm = "isotonic",
    hyperparameters = NULL,
    verbosity = 1L,
    ...
  ) {
    S7_dispatch()
  }
) # /rtemis::calibrate


# %% freeze ----
#' Freeze Hyperparameters
#'
#' @param x `Hyperparameters` object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
freeze <- new_generic("freeze", "x")


# %% lock ----
#' Lock Hyperparameters
#'
#' @param x `Hyperparameters` object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
lock <- new_generic("lock", "x")


# %% needs_tuning ----
#' needs_tuning
#'
#' @keywords internal
#' @noRd
needs_tuning <- new_generic("needs_tuning", "x")


# %% get_factor_levels ----
#' @name get_factor_levels
#'
#' @title
#' Get factor levels from data.frame or similar
#'
#' @usage
#' get_factor_levels(x)
#'
#' @param x tabular data.
#'
#' @return Named list of factor levels. Names correspond to column names.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_factor_levels <- new_generic(
  "get_factor_levels",
  "x",
  function(x) S7_dispatch()
)

method(get_factor_levels, class_data.frame) <- function(x) {
  factor_index <- which(sapply(x, is.factor))
  lapply(x[, factor_index, drop = FALSE], levels)
}

method(get_factor_levels, class_data.table) <- function(x) {
  factor_index <- which(sapply(x, is.factor))
  # with = FALSE slightly more performance than using .SD
  lapply(x[, factor_index, with = FALSE], levels)
}


# %% is_tuned ----
is_tuned <- new_generic("is_tuned", "x")


# %% get_tuned_status ----
get_tuned_status <- new_generic("get_tuned_status", "x")


# %% one_hot ----
one_hot <- new_generic("one_hot", "x")


# --- Custom S7 validators -------------------------------------------------------------------------
# %% scalar_dbl ----
#' Scalar double
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_dbl <- S7::new_property(
  class = S7::class_double | NULL,
  validator = function(value) {
    if (!is.null(value)) {
      if (length(value) != 1) {
        "must be a scalar double."
      } else if (!is.double(value)) {
        "must be double."
      }
    }
  }
) # /rtemis::scalar_dbl


# %% scalar_dbl_01excl ----
#' Scalar double between 0 and 1, exclusive
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_dbl_01excl <- S7::new_property(
  class = S7::class_double | NULL,
  validator = function(value) {
    if (!is.null(value)) {
      if (length(value) != 1) {
        "must be a scalar double."
      } else if (value <= 0 || value >= 1) {
        "must be between > 0 and < 1."
      }
    }
  }
) # /rtemis::scalar_dbl_01excl


# %% scalar_dbl_01incl ----
#' Scalar double between 0 and 1, inclusive
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_dbl_01incl <- S7::new_property(
  class = S7::class_double | NULL,
  validator = function(value) {
    if (!is.null(value)) {
      if (length(value) != 1) {
        "must be a scalar double."
      } else if (value < 0 || value > 1) {
        "must be between >= 0 and <= 1."
      }
    }
  }
) # /rtemis::scalar_dbl_01incl


# %% scalar_int ----
#' Scalar integer
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_int <- S7::new_property(
  class = S7::class_integer | NULL,
  validator = function(value) {
    if (!is.null(value)) {
      if (length(value) != 1) {
        "must be a scalar integer."
      }
    }
  }
) # /rtemis::scalar_int


# %% scalar_int_pos ----
#' Scalar positive integer
#'
#' @author EDG
#' @keywords internal
#' @noRd
scalar_int_pos <- S7::new_property(
  class = S7::class_integer | NULL,
  validator = function(value) {
    if (!is.null(value)) {
      if (length(value) != 1) {
        "must be a positive integer scalar."
      } else if (value < 0) {
        "must be >= 0."
      }
    }
  }
) # /rtemis::scalar_int_pos


# %% preprocessed ----
#' Get preprocessed data from `Preprocessor`.
#'
#' Returns the preprocessed data from a `Preprocessor` object.
#'
#' @param x `Preprocessor`: A `Preprocessor` object.
#'
#' @return data.frame: The preprocessed data.
#'
#' @export
#'
#' @examples
#' prp <- preprocess(iris, setup_Preprocessor(scale = TRUE, center = TRUE))
#' preprocessed(prp)
preprocessed <- new_generic("preprocessed", "x", function(x) {
  S7_dispatch()
}) # /rtemis::preprocessed


# --- Internal functions ---------------------------------------------------------------------------
#' Get output type
#'
#' Get output type for printing text.
#'
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#' @param filename Character: Filename for output.
#'
#' @return Character with selected output type.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
get_output_type <- function(
  output_type = c("ansi", "html", "plain"),
  filename = NULL
) {
  if (!is.null(filename)) {
    return("plain")
  }

  if (is.null(output_type)) {
    if (interactive()) {
      return("ansi")
    } else {
      return("plain")
    }
  }

  match.arg(output_type)
} # /rtemis::get_output_type


# %% S7_to_list ----
S7_to_list <- function(x) {
  if (S7_inherits(x)) {
    x <- props(x)
  }
  if (is.list(x)) {
    x <- lapply(x, S7_to_list)
  }
  x
} # /rtemis::S7_to_list


# %% toml_empty_to_null ----
toml_empty_to_null <- function(x) {
  if (!is.list(x)) {
    return(x)
  }
  if (length(x) == 0L) {
    return(NULL)
  }
  if (is.null(names(x))) {
    scalar_types <- vapply(
      x,
      function(el) {
        is.atomic(el) && length(el) == 1L && !is.null(el)
      },
      logical(1)
    )
    if (all(scalar_types)) {
      return(unlist(x, use.names = FALSE))
    }
  }
  lapply(x, toml_empty_to_null)
} # /rtemis::toml_empty_to_null


# %% write_lines ----
#' Write lines to file
#'
#' Normalizes path, check if directory exists, creates it if necessary,
#' writes lines to file, and checks if file was created successfully.
#'
#' @param x Character: Text to write to file.
#' @param file Character: Path to output file.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Invisible NULL. Called for side effect of writing to file.
#'
#' @author EDG
#' @keywords internal
#' @noRd
write_lines <- function(x, file, overwrite = FALSE, verbosity = 1L) {
  # Normalize path
  file <- normalizePath(file, mustWork = FALSE)
  # Check if file exists
  if (file.exists(file)) {
    if (overwrite) {
      if (verbosity >= 1L) {
        msg(fmt(
          paste("Overwriting existing file:", file),
          col = rtemis_colors[["orange"]]
        ))
      }
    } else {
      cli::cli_abort(
        "File already exists: {file}. Set `overwrite = TRUE` to overwrite."
      )
    }
  }
  # Get directory name
  dir <- dirname(file)
  # Check if directory exists, create it if not
  if (!dir.exists(dir)) {
    dir.create(dir, recursive = TRUE)
    if (!dir.exists(dir)) {
      cli::cli_abort("Failed to create directory: {dir}")
    } else {
      if (verbosity >= 1L) {
        msg(checkmark(), "Created directory:", dir)
      }
    }
  }
  # Write lines to file
  writeLines(x, con = file)
  # Check if file was created successfully
  if (!file.exists(file)) {
    cli::cli_abort("Failed to create file: {file}")
  } else {
    if (verbosity >= 1L) {
      msg(checkmark(), "Created file:", file)
    }
  }
  invisible(NULL)
} # /rtemis::write_lines


# %% toml_meta ----
#' @name
#' toml_meta
#'
#' @title
#' Write TOML metadata
#'
#' @description
#' Creates named list which will become first TOML table in the following format:
#'
#' ```toml
#' [_meta]
#' package = "rtemis"
#' package_version = "0.4.2"
#' schema_version = "1.0"
#' object_type = "SuperConfig"
#' created_at = 2026-2-11T22:45:00Z
#' ```
#' @param x Object to create metadata for. Class name will be included in metadata.
#' @param schema_version Character: Version of the schema to include in metadata.
#'
#' @return Named list containing metadata.
#'
#' @author EDG
#' @keywords internal
#' @noRd
toml_meta <- function(x, schema_version = "1.0") {
  list(
    `_meta` = list(
      package = "rtemis",
      package_version = as.character(packageVersion("rtemis")),
      schema_version = schema_version,
      object_type = S7_class(x)@name,
      created_at = format(
        Sys.time(),
        "%Y-%m-%dT%H:%M:%SZ",
        tz = "UTC"
      )
    )
  )
} # /rtemis::toml_meta


# %% toml_with_meta ----
#' Create TOML string with metadata
#'
#' Creates a TOML string with an inline metadata table followed by the TOML representation of the
#' object.
#'
#' @param x Object to convert to TOML. Class name will be included in metadata.
#'
#'
#' @return Character string containing TOML representation of the object, with metadata included as
#' an inline table at the top.
#'
#' @author EDG
#' @keywords internal
#' @noRd
toml_with_meta <- function(x, payload, schema_version = "1.0") {
  meta_block <- toml::write_toml(
    toml_meta(x, schema_version = schema_version)
  )
  meta_lines <- strsplit(meta_block, "\n", fixed = TRUE)[[1]]
  meta_lines <- meta_lines[meta_lines != "" & meta_lines != "[_meta]"]
  meta_inline <- paste0(
    "_meta = { ",
    paste(meta_lines, collapse = ", "),
    " }"
  )
  payload_str <- toml::write_toml(payload)
  paste(meta_inline, payload_str, sep = "\n\n")
} # /rtemis::toml_with_meta


================================================
FILE: R/01_ExecutionConfig.R
================================================
# ExecutionConfig.R
# ::rtemis::
# 2026- EDG rtemis.org

# %% ExecutionConfig ----
#' ExecutionConfig Class
#'
#' @description
#' Execution Configuration Class, defining sequential/parallel/distributed execution settings.
#'
#' @author EDG
#' @noRd
ExecutionConfig <- new_class(
  name = "ExecutionConfig",
  properties = list(
    backend = class_character,
    n_workers = class_integer,
    future_plan = class_character | NULL
  ),
  constructor = function(backend, n_workers, future_plan) {
    n_workers <- clean_int(n_workers)
    check_character(backend, allow_null = FALSE)
    check_character(future_plan, allow_null = TRUE)
    new_object(
      S7::S7_object(),
      backend = backend,
      n_workers = n_workers,
      future_plan = future_plan
    )
  },
  validator = function(self) {
    if (self@backend == "future" && is.null(self@future_plan)) {
      "@future_plan must be set when backend is 'future'."
    } else if (self@backend == "none" && self@n_workers != 1L) {
      "n_workers must be 1 when backend is 'none'."
    } else if (self@backend == "mirai" && self@n_workers < 1L) {
      "n_workers must be at least 1 when backend is 'mirai'."
    } else if (self@backend == "future" && self@n_workers < 1L) {
      "n_workers must be at least 1 when backend is 'future'."
    }
  }
) # /rtemis::ExecutionConfig


# %% repr.ExecutionConfig ----
method(repr, ExecutionConfig) <- function(x, pad = 0L, output_type = NULL) {
  out <- repr_S7name("ExecutionConfig", pad = pad, output_type = output_type)
  .props <- props(x)
  if (.props[["backend"]] != "future") {
    .props[["future_plan"]] <- NULL
  }
  out <- paste0(
    out,
    repr_ls(.props, pad = pad, output_type = output_type)
  )
} # /rtemis::repr.ExecutionConfig


# %% print.ExecutionConfig ----
method(print, ExecutionConfig) <- function(x, output_type = NULL, ...) {
  cat(repr(x, output_type = output_type), "\n")
  invisible(x)
} # /rtemis::print.ExecutionConfig


# %% --- User API ----

# %% setup_ExecutionConfig ----
#' Setup Execution Configuration
#'
#' @param backend Character: Execution backend: "future", "mirai", or "none".
#' @param n_workers Integer: Number of workers for parallel execution. Only used if `backend is
#'  "future"` or "mirai". Do not rely on the default value, set to an appropriate number depending
#' on your system.
#' @param future_plan Character: Future plan to use if `backend` is "future".
#'
#' @return `ExecutionConfig` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' setup_ExecutionConfig(backend = "future", n_workers = 4L, future_plan = "multisession")
setup_ExecutionConfig <- function(
  backend = c("future", "mirai", "none"),
  n_workers = NULL,
  future_plan = NULL
) {
  backend <- match.arg(backend)
  if (backend == "future") {
    check_dependencies("futurize")
    check_character(future_plan, allow_null = TRUE)
    if (is.null(future_plan)) {
      future_plan <- getOption("future.plan", "mirai_multisession")
    }
    if (!future_plan %in% ALLOWED_PLANS) {
      cli::cli_abort(
        "{.val {future_plan}} is not an allowed future plan. Allowed plans: {.val {ALLOWED_PLANS}}."
      )
    }
    if (is.null(n_workers)) {
      n_workers <- parallelly::availableCores(omit = 3L)
    }
  } else if (backend == "mirai") {
    check_dependencies("mirai")
    if (is.null(n_workers)) {
      n_workers <- parallelly::availableCores(omit = 3L)
    }
  } else if (backend == "none") {
    if (is.null(n_workers)) {
      n_workers <- 1L
    } else if (n_workers != 1L) {
      cli::cli_abort("n_workers must be 1 when backend is 'none'.")
    }
  }
  n_workers <- clean_int(n_workers)
  if (n_workers < 1L) {
    cli::cli_abort("n_workers must be at least 1.")
  }
  ExecutionConfig(
    backend = backend,
    n_workers = n_workers,
    future_plan = if (backend == "future") future_plan else NULL
  )
} # /rtemis::setup_ExecutionConfig


================================================
FILE: R/02_Hyperparameters.R
================================================
# S7_Hyperparameters.R
# ::rtemis::
# 2025- EDG rtemis.org

# References ----
# S7
# - https://github.com/RConsortium/S7
# - https://rconsortium.github.io/S7/
# LightGBM parameters
# - https://lightgbm.readthedocs.io/en/latest/Parameters.html

# %% Constants ----
# `tuned` values ----
# -9: Set by Tuner: Actively being tuned (Values fixed by Tuner).
# -2: Set by constructor: Not tunable (No tunable_hyperparameters).
# -1: Set by constructor: Not tunable (tunable_hyperparameters exist, but none of them have more than one value).
#  0: Set by constructor: Untuned but tunable (at least one of tunable_hyperparameters has more than one value).
#  1: Set by Tuner: Tuned (Started as 0, set to 1 when tuned).
TUNED_STATUS_TUNING <- -9L
TUNED_STATUS_NOT_TUNABLE <- -2L
TUNED_STATUS_NO_SEARCH_VALUES <- -1L
TUNED_STATUS_UNTUNED <- 0L
TUNED_STATUS_TUNED <- 1L

# `resampled` values ----
# 0: Running on single training set.
# 1: Running on resampled training sets.

# %% Hyperparameters ----
#' @title Hyperparameters
#'
#' @description
#' Superclass for hyperparameters.
#'
#' @field algorithm Character: Algorithm name.
#' @field hyperparameters Named list of algorithm hyperparameter values.
#' @field tunable_hyperparameters Character: Names of tunable hyperparameters.
#' @field fixed_hyperparameters Character: Names of fixed hyperparameters.
#' @field tuned Integer: Tuning status.
#' @field resampled Integer: Outer resampling status.
#' @field n_workers Integer: Number of workers to use for tuning.
#'
#' @author EDG
#' @noRd
Hyperparameters <- new_class(
  name = "Hyperparameters",
  properties = list(
    algorithm = class_character,
    hyperparameters = class_list,
    tunable_hyperparameters = class_character,
    fixed_hyperparameters = class_character,
    tuned = class_integer,
    resampled = class_integer,
    n_workers = class_integer
  ),
  constructor = function(
    algorithm,
    hyperparameters,
    tunable_hyperparameters,
    fixed_hyperparameters,
    n_workers = 1L
  ) {
    # Test if any tunable_hyperparameters have more than one value
    if (length(tunable_hyperparameters) > 0) {
      if (any(sapply(hyperparameters[tunable_hyperparameters], length) > 1)) {
        tuned <- 0L # Search values defined for tunable hyperparameters.
      } else {
        tuned <- -1L # No search values defined for tunable hyperparameters.
      }
    } else {
      tuned <- -2L # No tunable hyperparameters
    }
    # GLMNET
    if (algorithm == "GLMNET") {
      if (is.null(hyperparameters[["lambda"]])) {
        tuned <- 0L
      }
    }
    # LightGBM
    if (algorithm == "LightGBM") {
      if (is.null(hyperparameters[["nrounds"]])) {
        tuned <- 0L
      }
    }
    # SVM
    # Check kernel-specific hyperparameters
    if (algorithm == "SVM") {
      # linear => cost
      if (hyperparameters[["kernel"]] == "linear") {
        if (length(hyperparameters[["cost"]]) > 1) {
          tuned <- 0L
        }
      } else if (hyperparameters[["kernel"]] == "polynomial") {
        if (length(hyperparameters[["degree"]]) > 1) {
          tuned <- 0L
        }
      } else if (hyperparameters[["kernel"]] == "radial") {
        if (length(hyperparameters[["sigma"]]) > 1) {
          tuned <- 0L
        }
      }
    }
    n_workers <- clean_posint(n_workers)
    new_object(
      S7_object(),
      algorithm = algorithm,
      hyperparameters = hyperparameters,
      tunable_hyperparameters = tunable_hyperparameters,
      fixed_hyperparameters = fixed_hyperparameters,
      tuned = tuned,
      resampled = 0L,
      n_workers = n_workers
    )
  }
) # /rtemis::Hyperparameters


# %% repr.Hyperparameters ----
#' Repr Hyperparameters
#'
#' repr method for Hyperparameters object.
#'
#' @param x `Hyperparameters` object.
#' @param pad Integer: Left padding for printed output.
#' @param maxlength Integer: Maximum length of items to show using `headdot()` before truncating with ellipsis. `-1` means no limit.
#' @param limit Integer: Limit number of items to show. `-1` means no limit.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @author EDG
#' @noRd
method(repr, Hyperparameters) <- function(
  x,
  pad = 0L,
  maxlength = -1L,
  limit = -1L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  out <- repr_S7name(
    paste0(x@algorithm, "Hyperparameters"),
    pad = pad,
    output_type = output_type
  )
  out <- paste0(
    out,
    repr_ls(
      props(x)[-1],
      pad = pad,
      maxlength = maxlength,
      limit = limit,
      output_type = output_type
    )
  )
  if (x@tuned == TUNED_STATUS_TUNING) {
    out <- paste0(
      out,
      fmt(
        "\n  Hyperparameters are being tuned.\n",
        col = col_tuner,
        bold = TRUE,
        output_type = output_type
      )
    )
  } else if (x@tuned == TUNED_STATUS_NOT_TUNABLE) {
    out <- paste0(
      out,
      fmt(
        "\n  No hyperparameters are tunable.\n",
        col = col_tuner,
        bold = TRUE,
        output_type = output_type
      )
    )
  } else if (x@tuned == TUNED_STATUS_UNTUNED) {
    need_tuning <- names(get_hyperparams_need_tuning(x))
    out <- paste0(
      out,
      fmt(
        paste0(
          "\n  ",
          ngettext(length(need_tuning), "Hyperparameter ", "Hyperparameters "),
          oxfordcomma(
            need_tuning
          ),
          ngettext(length(need_tuning), " needs ", " need "),
          "tuning.\n"
        ),
        col = col_tuner,
        bold = TRUE,
        output_type = output_type
      )
    )
  } else if (x@tuned == TUNED_STATUS_NO_SEARCH_VALUES) {
    out <- paste0(
      out,
      fmt(
        "\n  No search values defined for tunable hyperparameters.\n",
        col = col_tuner,
        bold = TRUE,
        output_type = output_type
      )
    )
  } else if (x@tuned == TUNED_STATUS_TUNED) {
    out <- paste0(
      out,
      fmt(
        "\n  Hyperparameters are tuned.\n",
        col = col_tuner,
        bold = TRUE,
        output_type = output_type
      )
    )
  }
  out
} # /rtemis::repr.Hyperparameters


# %% print.Hyperparameters ----
method(print, Hyperparameters) <- function(x, output_type = NULL, ...) {
  cat(repr(x, output_type = output_type))
  invisible(x)
} # /rtemis::print.Hyperparameters


# %% is_tuned.Hyperparameters ----
method(is_tuned, Hyperparameters) <- function(x) {
  x@tuned == 1L
} # /is_tuned.Hyperparameters


# %% get_tuned_status.Hyperparameters ----
method(get_tuned_status, Hyperparameters) <- function(x) {
  if (length(x@tunable_hyperparameters) > 0) {
    if (any(sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1)) {
      0L
    } else {
      -1L
    }
  } else {
    -2L
  }
} # /rtemis::get_tuned_status.Hyperparameters


# %% update.Hyperparameters ----
#' Update Hyperparameters
#'
#' @param x `Hyperparameters` object.
#' @param hyperparameters Named list of algorithm hyperparameter values.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(update, Hyperparameters) <- function(
  object,
  hyperparameters,
  tuned = NULL,
  ...
) {
  for (hp in names(hyperparameters)) {
    object@hyperparameters[[hp]] <- hyperparameters[[hp]]
  }
  # Update tuned status
  if (is.null(tuned)) {
    object@tuned <- get_tuned_status(object)
  } else {
    object@tuned <- tuned
  }
  object
} # /rtemis::update.Hyperparameters


# %% freeze.Hyperparameters ----
method(freeze, Hyperparameters) <- function(x) {
  x@tuned <- -1L
} # /rtemis::freeze.Hyperparameters


# %% lock.Hyperparameters ----
method(lock, Hyperparameters) <- function(x) {
  x@tuned <- 1L
}


# %% `$`.Hyperparameters ----
# Make Hyperparameters@hyperparameters@name `$`-accessible
method(`$`, Hyperparameters) <- function(x, name) {
  x@hyperparameters[[name]]
}


# %% `.DollarNames`.Hyperparameters ----
# `$`-autocomplete Hyperparameters@hyperparameters
method(`.DollarNames`, Hyperparameters) <- function(x, pattern = "") {
  all_names <- names(x@hyperparameters)
  grep(pattern, all_names, value = TRUE)
}


# %% `[[`.Hyperparameters ----
# Make Hyperparameters@hyperparameters@name `[[`-accessible
method(`[[`, Hyperparameters) <- function(x, name) {
  x@hyperparameters[[name]]
}


# %% needs_tuning.Hyperparameters ----
method(needs_tuning, Hyperparameters) <- function(x) {
  x@tuned == 0
} # /rtemis::needs_tuning.Hyperparameters


# %% get_hyperparams_need_tuning.Hyperparameters ----
#' Get hyperparameters that need tuning in an algorithm-specific way.
#'
#' @keywords internal
#' @noRd
method(get_hyperparams_need_tuning, Hyperparameters) <- function(x) {
  # -> list
  # Get tunable hyperparameters with more than one value
  x@hyperparameters[x@tunable_hyperparameters[
    sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1
  ]]
} # /get_hyperparams_need_tuning.Hyperparameters


# %% get_hyperparams.(Hyperparameters, class_character) ----
method(get_hyperparams, list(Hyperparameters, class_character)) <- function(
  x,
  param_names
) {
  sapply(param_names, function(p) x@hyperparameters[p], USE.NAMES = FALSE)
} # /rtemis::get_hyperparams_need_tuning.Hyperparameters


# %% GLMHyperparameters ----
#' @author EDG
#'
#' @keywords internal
#' @noRd
GLMHyperparameters <- new_class(
  name = "GLMHyperparameters",
  parent = Hyperparameters,
  constructor = function(ifw) {
    new_object(
      Hyperparameters(
        algorithm = "GLM",
        hyperparameters = list(
          ifw = ifw
        ),
        tunable_hyperparameters = "ifw",
        fixed_hyperparameters = character()
      )
    )
  } # /constructor
) # /rtemis::GLMHyperparameters


# %% setup_GLM ----
#' Setup GLM Hyperparameters
#'
#' Setup hyperparameters for GLM training.
#'
#' @param ifw (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return GLMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' glm_hyperparams <- setup_GLM(ifw = TRUE)
#' glm_hyperparams
setup_GLM <- function(ifw = FALSE) {
  GLMHyperparameters(ifw = ifw)
}


# %% GAMHyperparameters ----
GAM_tunable <- c("k", "ifw")
GAM_fixed <- character()

#' @author EDG
#' @keywords internal
#' @noRd
GAMHyperparameters <- new_class(
  name = "GAMHyperparameters",
  parent = Hyperparameters,
  constructor = function(k, ifw) {
    new_object(
      Hyperparameters(
        algorithm = "GAM",
        hyperparameters = list(
          k = k,
          ifw = ifw
        ),
        tunable_hyperparameters = GAM_tunable,
        fixed_hyperparameters = GAM_fixed
      )
    )
  } # /constructor
) # /rtemis::GAMHyperparameters


# %% setup_GAM ----
#' Setup GAM Hyperparameters
#'
#' Setup hyperparameters for GAM training.
#'
#' Get more information from [mgcv::gam].
#'
#' @param k (Tunable) Integer: Number of knots.
#' @param ifw (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return GAMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' gam_hyperparams <- setup_GAM(k = 5L, ifw = FALSE)
#' gam_hyperparams
setup_GAM <- function(k = 5L, ifw = FALSE) {
  k <- clean_posint(k)
  GAMHyperparameters(k = k, ifw = ifw)
}


# %% CARTHyperparameters ----
CART_tunable <- c("cp", "maxdepth", "minsplit", "minbucket", "prune_cp", "ifw")
CART_fixed <- c(
  "method",
  "model",
  "maxcompete",
  "maxsurrogate",
  "usesurrogate",
  "surrogatestyle",
  "xval",
  "cost"
)


#' @title CARTHyperparameters
#'
#' @description
#' Hyperparameters subclass for CART.
#'
#' @author EDG
#' @keywords internal
#' @noRd
CARTHyperparameters <- new_class(
  name = "CARTHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    cp,
    maxdepth,
    minsplit,
    minbucket,
    prune_cp,
    method,
    model,
    maxcompete,
    maxsurrogate,
    usesurrogate,
    surrogatestyle,
    xval,
    cost,
    ifw
  ) {
    new_object(
      Hyperparameters(
        algorithm = "CART",
        hyperparameters = list(
          cp = cp,
          maxdepth = maxdepth,
          minsplit = minsplit,
          minbucket = minbucket,
          prune_cp = prune_cp,
          method = method,
          model = model,
          maxcompete = maxcompete,
          maxsurrogate = maxsurrogate,
          usesurrogate = usesurrogate,
          surrogatestyle = surrogatestyle,
          xval = xval,
          cost = cost,
          ifw = ifw
        ),
        tunable_hyperparameters = CART_tunable,
        fixed_hyperparameters = CART_fixed
      )
    )
  } # /constructor
) # /rtemis::CARTHyperparameters


# %% setup_CART ----
#' Setup CART Hyperparameters
#'
#' Setup hyperparameters for CART training.
#'
#' Get more information from [rpart::rpart] and [rpart::rpart.control].
#'
#' @param cp (Tunable) Numeric: Complexity parameter.
#' @param maxdepth (Tunable) Integer: Maximum depth of tree.
#' @param minsplit (Tunable) Integer: Minimum number of observations in a node to split.
#' @param minbucket (Tunable) Integer: Minimum number of observations in a terminal node.
#' @param prune_cp (Tunable) Numeric: Complexity for cost-complexity pruning after tree is built
#' @param method String: Splitting method.
#' @param model Logical: If TRUE, return a model.
#' @param maxcompete Integer: Maximum number of competitive splits.
#' @param maxsurrogate Integer: Maximum number of surrogate splits.
#' @param usesurrogate Integer: Number of surrogate splits to use.
#' @param surrogatestyle Integer: Type of surrogate splits.
#' @param xval Integer: Number of cross-validation folds.
#' @param cost Numeric (>=0): One for each feature.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return CARTHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' cart_hyperparams <- setup_CART(cp = 0.01, maxdepth = 10L, ifw = TRUE)
#' cart_hyperparams
setup_CART <- function(
  # tunable
  cp = 0.01,
  maxdepth = 20L,
  minsplit = 2L,
  minbucket = 1L, # round(minsplit / 3),
  prune_cp = NULL,
  # fixed
  method = "auto",
  model = TRUE,
  maxcompete = 4L,
  maxsurrogate = 5L,
  usesurrogate = 2L,
  surrogatestyle = 0L,
  xval = 0L,
  cost = NULL,
  ifw = FALSE
) {
  check_inherits(cp, "numeric")
  maxdepth <- clean_int(maxdepth)
  minsplit <- clean_int(minsplit)
  minbucket <- clean_int(minbucket)
  check_inherits(prune_cp, "numeric")
  check_inherits(method, "character")
  check_inherits(model, "logical")
  maxcompete <- clean_int(maxcompete)
  maxsurrogate <- clean_int(maxsurrogate)
  usesurrogate <- clean_int(usesurrogate)
  surrogatestyle <- clean_int(surrogatestyle)
  xval <- clean_int(xval)
  check_inherits(cost, "numeric")
  CARTHyperparameters(
    cp = cp,
    maxdepth = maxdepth,
    minsplit = minsplit,
    minbucket = minbucket,
    prune_cp = prune_cp,
    method = method,
    model = model,
    maxcompete = maxcompete,
    maxsurrogate = maxsurrogate,
    usesurrogate = usesurrogate,
    surrogatestyle = surrogatestyle,
    xval = xval,
    cost = cost,
    ifw = ifw
  )
} # /rtemis::setup_CART

# Test that all CART hyperparameters are set by setup_CART
stopifnot(all(c(CART_tunable, CART_fixed) %in% names(formals(setup_CART))))


# %% GLMNETHyperparameters ----
GLMNET_tunable <- c("alpha", "ifw")
GLMNET_fixed <- c(
  "family",
  "offset",
  "which_lambda_cv",
  "nlambda",
  "penalty_factor",
  "standardize",
  "intercept"
)

#' @title GLMNETHyperparameters
#'
#' @description
#' Hyperparameters subclass for GLMNET.
#'
#' @author EDG
#' @keywords internal
#' @noRd
GLMNETHyperparameters <- new_class(
  name = "GLMNETHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    alpha,
    family,
    offset,
    which_lambda_cv,
    nlambda,
    lambda,
    penalty_factor,
    standardize,
    intercept,
    ifw
  ) {
    check_float01inc(alpha)
    check_inherits(which_lambda_cv, "character")
    nlambda <- clean_posint(nlambda)
    check_inherits(penalty_factor, "numeric")
    check_inherits(standardize, "logical")
    new_object(
      Hyperparameters(
        algorithm = "GLMNET",
        hyperparameters = list(
          alpha = alpha,
          family = family,
          offset = offset,
          which_lambda_cv = which_lambda_cv,
          nlambda = nlambda,
          lambda = lambda,
          penalty_factor = penalty_factor,
          standardize = standardize,
          intercept = intercept,
          ifw = ifw
        ),
        tunable_hyperparameters = GLMNET_tunable,
        fixed_hyperparameters = GLMNET_fixed
      )
    )
  } # /constructor
) # /rtemis::GLMNETHyperparameters

#' Setup GLMNET Hyperparameters
#'
#' Setup hyperparameters for GLMNET training.
#'
#' Get more information from [glmnet::glmnet].
#'
#' @param alpha (Tunable) Numeric: Mixing parameter.
#' @param family Character: Family for GLMNET.
#' @param offset Numeric: Offset for GLMNET.
#' @param which_lambda_cv Character: Which lambda to use for prediction:
#' "lambda.1se" or "lambda.min"
#' @param nlambda Positive integer: Number of lambda values.
#' @param lambda Numeric: Lambda values.
#' @param penalty_factor Numeric: Penalty factor for each feature.
#' @param standardize Logical: If TRUE, standardize features.
#' @param intercept Logical: If TRUE, include intercept.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return GLMNETHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' glm_hyperparams <- setup_GLMNET(alpha = 1, ifw = TRUE)
#' glm_hyperparams
setup_GLMNET <- function(
  # tunable
  alpha = 1,
  # fixed
  family = NULL,
  offset = NULL,
  which_lambda_cv = "lambda.1se",
  nlambda = 100L,
  lambda = NULL,
  penalty_factor = NULL,
  standardize = TRUE,
  intercept = TRUE,
  ifw = TRUE
) {
  check_float01inc(alpha)
  check_inherits(which_lambda_cv, "character")
  nlambda <- clean_posint(nlambda)
  check_inherits(penalty_factor, "numeric")
  check_logical(standardize)
  check_logical(ifw)
  GLMNETHyperparameters(
    family = family,
    offset = offset,
    alpha = alpha,
    which_lambda_cv = which_lambda_cv,
    nlambda = nlambda,
    lambda = lambda,
    penalty_factor = penalty_factor,
    standardize = standardize,
    intercept = intercept,
    ifw = ifw
  )
} # /rtemis::setup_GLMNET

# Test that all GLMNET hyperparameters are set by setup_GLMNET
stopifnot(all(
  c(GLMNET_tunable, GLMNET_fixed) %in% names(formals(setup_GLMNET))
))

method(get_hyperparams_need_tuning, GLMNETHyperparameters) <- function(x) {
  # Get tunable hyperparameters with more than one value
  out <- x@hyperparameters[x@tunable_hyperparameters[
    sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1
  ]]
  if (is.null(x[["lambda"]])) {
    out <- c(out, list(lambda = NULL))
  }
  out
} # /rtemis::get_hyperparams_need_tuning.GLMNETHyperparameters


# %% LightCARTHyperparameters ----
LightCART_tunable <- c(
  "num_leaves",
  "max_depth",
  "lambda_l1",
  "lambda_l2",
  "min_data_in_leaf",
  "max_cat_threshold",
  "min_data_per_group",
  "linear_tree",
  "ifw"
)
LightCART_fixed <- c("objective")

#' @title LightCARTHyperparameters
#'
#' @description
#' Hyperparameters subclass for LightCART
#'
#' @author EDG
#' @keywords internal
#' @noRd
LightCARTHyperparameters <- new_class(
  name = "LightCARTHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    num_leaves,
    max_depth,
    lambda_l1,
    lambda_l2,
    min_data_in_leaf,
    max_cat_threshold,
    min_data_per_group,
    linear_tree,
    objective,
    ifw
  ) {
    new_object(
      Hyperparameters(
        algorithm = "LightCART",
        hyperparameters = list(
          num_leaves = num_leaves,
          max_depth = max_depth,
          lambda_l1 = lambda_l1,
          lambda_l2 = lambda_l2,
          min_data_in_leaf = min_data_in_leaf,
          max_cat_threshold = max_cat_threshold,
          min_data_per_group = min_data_per_group,
          linear_tree = linear_tree,
          objective = objective,
          ifw = ifw
        ),
        tunable_hyperparameters = LightCART_tunable,
        fixed_hyperparameters = LightCART_fixed
      )
    )
  } # /constructor
) # /rtemis::LightCARTHyperparameters


# %% setup_LightCART ----
#' Setup LightCART Hyperparameters
#'
#' Setup hyperparameters for LightCART training.
#'
#' Get more information from [lightgbm::lgb.train].
#'
#' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree.
#' @param max_depth (Tunable) Integer: Maximum depth of trees.
#' @param lambda_l1 (Tunable) Numeric: L1 regularization.
#' @param lambda_l2 (Tunable) Numeric: L2 regularization.
#' @param min_data_in_leaf (Tunable) Positive integer: Minimum number of data in a leaf.
#' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features.
#' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group.
#' @param linear_tree (Tunable) Logical: If TRUE, use linear trees.
#' @param objective Character: Objective function.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return LightCARTHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' lightcart_hyperparams <- setup_LightCART(num_leaves = 32L, ifw = FALSE)
#' lightcart_hyperparams
setup_LightCART <- function(
  num_leaves = 32L,
  max_depth = -1L,
  lambda_l1 = 0,
  lambda_l2 = 0,
  min_data_in_leaf = 20L,
  max_cat_threshold = 32L,
  min_data_per_group = 100L,
  linear_tree = FALSE,
  objective = NULL,
  ifw = FALSE
) {
  num_leaves <- clean_posint(num_leaves)
  max_depth <- clean_int(max_depth)
  check_float0pos(lambda_l1)
  check_float0pos(lambda_l2)
  min_data_in_leaf <- clean_posint(min_data_in_leaf)
  max_cat_threshold <- clean_posint(max_cat_threshold)
  min_data_per_group <- clean_posint(min_data_per_group)
  check_logical(linear_tree)
  LightCARTHyperparameters(
    num_leaves = num_leaves,
    max_depth = max_depth,
    lambda_l1 = lambda_l1,
    lambda_l2 = lambda_l2,
    min_data_in_leaf = min_data_in_leaf,
    max_cat_threshold = max_cat_threshold,
    min_data_per_group = min_data_per_group,
    linear_tree = linear_tree,
    objective = objective,
    ifw = ifw
  )
} # /rtemis::setup_LightCART


# %% LightRFHyperparameters ----
LightRF_tunable <- c(
  "nrounds",
  "num_leaves",
  "max_depth",
  "feature_fraction",
  "subsample",
  "lambda_l1",
  "lambda_l2",
  "max_cat_threshold",
  "min_data_per_group",
  "ifw"
)
LightRF_fixed <- c(
  "objective",
  "device_type",
  "tree_learner",
  "boosting_type",
  "learning_rate",
  "subsample_freq",
  "early_stopping_rounds",
  "force_col_wise"
)

#' @title LightRFHyperparameters
#'
#' @description
#' Hyperparameters subclass for LightRF
#'
#' @author EDG
#' @keywords internal
#' @noRd
LightRFHyperparameters <- new_class(
  name = "LightRFHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    nrounds,
    num_leaves,
    max_depth,
    feature_fraction,
    subsample,
    lambda_l1,
    lambda_l2,
    max_cat_threshold,
    min_data_per_group,
    linear_tree,
    ifw,
    # fixed
    objective,
    device_type,
    tree_learner,
    force_col_wise
  ) {
    new_object(
      Hyperparameters(
        algorithm = "LightRF",
        hyperparameters = list(
          nrounds = nrounds,
          num_leaves = num_leaves,
          max_depth = max_depth,
          feature_fraction = feature_fraction,
          subsample = subsample,
          lambda_l1 = lambda_l1,
          lambda_l2 = lambda_l2,
          max_cat_threshold = max_cat_threshold,
          min_data_per_group = min_data_per_group,
          linear_tree = linear_tree,
          ifw = ifw,
          # fixed
          objective = objective,
          device_type = device_type,
          tree_learner = tree_learner,
          force_col_wise = force_col_wise,
          # unsettable: LightGBM params for RF
          boosting_type = "rf",
          learning_rate = 1, # no effect? in boosting_type 'rf', but set for clarity
          subsample_freq = 1L, # a.k.a. bagging_freq
          early_stopping_rounds = -1L
        ),
        tunable_hyperparameters = LightRF_tunable,
        fixed_hyperparameters = LightRF_fixed
      )
    )
  }
) # /rtemis::LightRFHyperparameters


# %% setup_LightRF ----
#' Setup LightRF Hyperparameters
#'
#' Setup hyperparameters for LightRF training.
#'
#' Get more information from [lightgbm::lgb.train].
#' Note that hyperparameters subsample_freq and early_stopping_rounds are fixed,
#' and cannot be set because they are what makes `lightgbm` train a random forest.
#' These can all be set when training gradient boosting with LightGBM.
#'
#' @param nrounds (Tunable) Positive integer: Number of boosting rounds.
#' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree.
#' @param max_depth (Tunable) Integer: Maximum depth of trees.
#' @param feature_fraction (Tunable) Numeric: Fraction of features to use.
#' @param subsample (Tunable) Numeric: Fraction of data to use.
#' @param lambda_l1 (Tunable) Numeric: L1 regularization.
#' @param lambda_l2 (Tunable) Numeric: L2 regularization.
#' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features.
#' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group.
#' @param linear_tree Logical: If TRUE, use linear trees.
#' @param objective Character: Objective function.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#' @param device_type Character: "cpu" or "gpu".
#' @param tree_learner Character: "serial", "feature", "data", or "voting".
#' @param force_col_wise Logical: Use only with CPU - If TRUE, force col-wise histogram building.
#'
#' @return LightRFHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' lightrf_hyperparams <- setup_LightRF(nrounds = 1000L, ifw = FALSE)
#' lightrf_hyperparams
setup_LightRF <- function(
  nrounds = 500L,
  num_leaves = 4096L,
  max_depth = -1L,
  feature_fraction = 0.7,
  subsample = .623, # a.k.a. bagging_fraction
  lambda_l1 = 0,
  lambda_l2 = 0,
  max_cat_threshold = 32L,
  min_data_per_group = 32L,
  linear_tree = FALSE,
  ifw = FALSE,
  # fixed
  objective = NULL,
  device_type = "cpu",
  tree_learner = "serial",
  force_col_wise = TRUE
) {
  nrounds <- clean_posint(nrounds)
  num_leaves <- clean_posint(num_leaves)
  max_depth <- clean_int(max_depth)
  check_float01inc(feature_fraction)
  check_float01inc(subsample)
  check_float0pos(lambda_l1)
  check_float0pos(lambda_l2)
  max_cat_threshold <- clean_posint(max_cat_threshold)
  min_data_per_group <- clean_posint(min_data_per_group)
  check_logical(linear_tree)
  LightRFHyperparameters(
    nrounds = nrounds,
    num_leaves = num_leaves,
    max_depth = max_depth,
    feature_fraction = feature_fraction,
    subsample = subsample,
    lambda_l1 = lambda_l1,
    lambda_l2 = lambda_l2,
    max_cat_threshold = max_cat_threshold,
    min_data_per_group = min_data_per_group,
    linear_tree = linear_tree,
    ifw = ifw,
    objective = objective,
    device_type = device_type,
    tree_learner = tree_learner,
    force_col_wise = force_col_wise
  )
} # /rtemis::setupLightRF

# Test that all LightRF hyperparameters are set by setup_LightRF
# LightRF fixed hyperparameters are not editable.
stopifnot(all(LightRF_tunable %in% names(formals(setup_LightRF))))


# %% LightGBMHyperparameters ----
LightGBM_tunable <- c(
  "num_leaves",
  "max_depth",
  "learning_rate",
  "feature_fraction",
  "subsample",
  "subsample_freq",
  "lambda_l1",
  "lambda_l2",
  "max_cat_threshold",
  "min_data_per_group",
  "linear_tree",
  "ifw"
)
LightGBM_fixed <- c(
  "max_nrounds",
  "force_nrounds",
  "early_stopping_rounds",
  "objective",
  "device_type",
  "tree_learner",
  "force_col_wise"
)


#' @title LightGBMHyperparameters
#'
#' @description
#' Hyperparameters subclass for LightGBM
#'
#' @author EDG
#' @keywords internal
#' @noRd
LightGBMHyperparameters <- new_class(
  name = "LightGBMHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    max_nrounds,
    force_nrounds,
    early_stopping_rounds,
    # tunable
    num_leaves,
    max_depth,
    learning_rate,
    feature_fraction,
    subsample,
    subsample_freq,
    lambda_l1,
    lambda_l2,
    max_cat_threshold,
    min_data_per_group,
    linear_tree,
    ifw,
    objective,
    device_type,
    tree_learner,
    force_col_wise
  ) {
    nrounds <- if (!is.null(force_nrounds)) {
      force_nrounds
    } else {
      NULL
    }
    new_object(
      Hyperparameters(
        algorithm = "LightGBM",
        hyperparameters = list(
          nrounds = nrounds,
          max_nrounds = max_nrounds,
          force_nrounds = force_nrounds,
          early_stopping_rounds = early_stopping_rounds,
          num_leaves = num_leaves,
          max_depth = max_depth,
          learning_rate = learning_rate,
          feature_fraction = feature_fraction,
          subsample = subsample,
          subsample_freq = subsample_freq,
          lambda_l1 = lambda_l1,
          lambda_l2 = lambda_l2,
          max_cat_threshold = max_cat_threshold,
          min_data_per_group = min_data_per_group,
          linear_tree = linear_tree,
          ifw = ifw,
          objective = objective,
          device_type = device_type,
          tree_learner = tree_learner,
          force_col_wise = force_col_wise
        ),
        tunable_hyperparameters = LightGBM_tunable,
        fixed_hyperparameters = LightGBM_fixed
      )
    )
  }
) # /rtemis::LightGBMHyperparameters

method(update, LightGBMHyperparameters) <- function(
  object,
  hyperparameters,
  tuned = NULL,
  ...
) {
  for (hp in names(hyperparameters)) {
    object@hyperparameters[[hp]] <- hyperparameters[[hp]]
  }
  # Update tuned status
  if (is.null(tuned)) {
    object@tuned <- get_tuned_status(object)
  } else {
    object@tuned <- tuned
  }
  # Update nrounds (e.g. in LightRuleFit)
  if (
    is.null(object@hyperparameters[["nrounds"]]) &&
      !is.null(object@hyperparameters[["force_nrounds"]])
  ) {
    object@hyperparameters[["nrounds"]] <- object@hyperparameters[[
      "force_nrounds"
    ]]
  }
  object
} # /update.LightGBMHyperparameters


# %% setup_LightGBM ----
# References:
# LightGBM parameters: https://lightgbm.readthedocs.io/en/latest/Parameters.html

#' Setup LightGBM Hyperparameters
#'
#' Setup hyperparameters for LightGBM training.
#'
#' Get more information from [lightgbm::lgb.train].
#'
#' @param max_nrounds Positive integer: Maximum number of boosting rounds.
#' @param force_nrounds Positive integer: Use this many boosting rounds. Disable search for nrounds.
#' @param early_stopping_rounds Positive integer: Number of rounds without improvement to stop training.
#' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree.
#' @param max_depth (Tunable) Integer: Maximum depth of trees.
#' @param learning_rate (Tunable) Numeric: Learning rate.
#' @param feature_fraction (Tunable) Numeric: Fraction of features to use.
#' @param subsample (Tunable) Numeric: Fraction of data to use.
#' @param subsample_freq (Tunable) Positive integer: Frequency of subsample.
#' @param lambda_l1 (Tunable) Numeric: L1 regularization.
#' @param lambda_l2 (Tunable) Numeric: L2 regularization.
#' @param max_cat_threshold (Tunable) Positive integer: Maximum number of categories for categorical features.
#' @param min_data_per_group (Tunable) Positive integer: Minimum number of observations per categorical group.
#' @param linear_tree Logical: If TRUE, use linear trees.
#' @param objective Character: Objective function.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#' @param device_type Character: "cpu" or "gpu".
#' @param tree_learner Character: "serial", "feature", "data", or "voting".
#' @param force_col_wise Logical: Use only with CPU - If TRUE, force col-wise histogram building.
#'
#' @return LightGBMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' lightgbm_hyperparams <- setup_LightGBM(
#'   max_nrounds = 500L,
#'   learning_rate = c(0.001, 0.01, 0.05), ifw = TRUE
#' )
#' lightgbm_hyperparams
setup_LightGBM <- function(
  # nrounds will be auto-tuned if force_nrounds is NULL with a value up to max_nrounds and
  # using early_stopping_rounds.
  max_nrounds = 1000L,
  force_nrounds = NULL,
  early_stopping_rounds = 10L,
  # tunable
  num_leaves = 8L,
  max_depth = -1L,
  learning_rate = 0.01,
  feature_fraction = 1.0,
  subsample = 1.0, # a.k.a. bagging_fraction {check:hyper}
  subsample_freq = 1L,
  lambda_l1 = 0,
  lambda_l2 = 0,
  max_cat_threshold = 32L,
  min_data_per_group = 32L,
  linear_tree = FALSE,
  ifw = FALSE,
  objective = NULL,
  device_type = "cpu",
  tree_learner = "serial",
  force_col_wise = TRUE
) {
  max_nrounds <- clean_posint(max_nrounds)
  force_nrounds <- clean_posint(force_nrounds)
  early_stopping_rounds <- clean_posint(early_stopping_rounds)
  num_leaves <- clean_posint(num_leaves)
  max_depth <- clean_int(max_depth)
  check_floatpos1(learning_rate)
  check_floatpos1(feature_fraction)
  check_floatpos1(subsample)
  subsample_freq <- clean_posint(subsample_freq)
  check_float0pos(lambda_l1)
  check_float0pos(lambda_l2)
  max_cat_threshold <- clean_posint(max_cat_threshold)
  min_data_per_group <- clean_posint(min_data_per_group)
  check_logical(linear_tree)
  LightGBMHyperparameters(
    max_nrounds = max_nrounds,
    force_nrounds = force_nrounds,
    early_stopping_rounds = early_stopping_rounds,
    num_leaves = num_leaves,
    max_depth = max_depth,
    learning_rate = learning_rate,
    feature_fraction = feature_fraction,
    subsample = subsample,
    subsample_freq = subsample_freq,
    lambda_l1 = lambda_l1,
    lambda_l2 = lambda_l2,
    max_cat_threshold = max_cat_threshold,
    min_data_per_group = min_data_per_group,
    linear_tree = linear_tree,
    ifw = ifw,
    objective = objective,
    device_type = device_type,
    tree_learner = tree_learner,
    force_col_wise = force_col_wise
  )
} # /rtemis::setupLightGBM

# Test that all LightGBM hyperparameters are set by setup_LightGBM
stopifnot(all(
  c(LightGBM_tunable, LightGBM_fixed) %in% names(formals(setup_LightGBM))
))

method(get_hyperparams_need_tuning, LightGBMHyperparameters) <- function(x) {
  # Get tunable hyperparameters with more than one value
  out <- x@hyperparameters[x@tunable_hyperparameters[
    sapply(x@hyperparameters[x@tunable_hyperparameters], length) > 1
  ]]
  if (is.null(x[["nrounds"]])) {
    out <- c(out, list(nrounds = NULL))
  }
  out
} # /get_hyperparams_need_tuning.LightGBMHyperparameters


# %% LightRuleFitHyperparameters ----
LightRuleFit_tunable <- c(
  "nrounds",
  "num_leaves",
  "max_depth",
  "learning_rate",
  "subsample",
  "subsample_freq",
  "lambda_l1",
  "lambda_l2",
  "alpha",
  "ifw_lightgbm",
  "ifw_glmnet"
)
LightRuleFit_fixed <- c("lambda", "objective")
LightRuleFit_lightgbm_params <- c(
  "nrounds",
  "num_leaves",
  "max_depth",
  "learning_rate",
  "subsample",
  "subsample_freq",
  "lambda_l1",
  "lambda_l2",
  "objective"
)
LightRuleFit_glmnet_params <- c("alpha", "lambda")


#' @title LightRuleFitHyperparameters
#'
#' @description
#' Hyperparameters subclass for LightRuleFit.
#'
#' @author EDG
#' @keywords internal
#' @noRd
LightRuleFitHyperparameters <- new_class(
  name = "LightRuleFitHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    nrounds,
    num_leaves,
    max_depth,
    learning_rate,
    subsample,
    subsample_freq,
    lambda_l1,
    lambda_l2,
    objective,
    ifw_lightgbm,
    # GLMNET
    alpha,
    lambda,
    ifw_glmnet,
    # IFW
    ifw
  ) {
    new_object(
      Hyperparameters(
        algorithm = "LightRuleFit",
        hyperparameters = list(
          nrounds = nrounds,
          num_leaves = num_leaves,
          max_depth = max_depth,
          learning_rate = learning_rate,
          subsample = subsample,
          subsample_freq = subsample_freq,
          lambda_l1 = lambda_l1,
          lambda_l2 = lambda_l2,
          objective = objective,
          ifw_lightgbm = ifw_lightgbm,
          # GLMNET
          alpha = alpha,
          lambda = lambda,
          ifw_glmnet = ifw_glmnet,
          # IFW
          ifw = ifw
        ),
        tunable_hyperparameters = LightRuleFit_tunable,
        fixed_hyperparameters = LightRuleFit_fixed
      )
    )
  }
) # /rtemis::LightRuleFitHyperparameters


# %% setup_LightRuleFit ----
#' Setup LightRuleFit Hyperparameters
#'
#' Setup hyperparameters for LightRuleFit training.
#'
#' Get more information from [lightgbm::lgb.train].
#'
#' @param nrounds (Tunable) Positive integer: Number of boosting rounds.
#' @param num_leaves (Tunable) Positive integer: Maximum number of leaves in one tree.
#' @param max_depth (Tunable) Integer: Maximum depth of trees.
#' @param learning_rate (Tunable) Numeric: Learning rate.
#' @param subsample (Tunable) Numeric: Fraction of data to use.
#' @param subsample_freq (Tunable) Positive integer: Frequency of subsample.
#' @param lambda_l1 (Tunable) Numeric: L1 regularization.
#' @param lambda_l2 (Tunable) Numeric: L2 regularization.
#' @param objective Character: Objective function.
#' @param ifw_lightgbm (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the LightGBM
#' step.
#' @param objective Character: Objective function.
#' @param alpha (Tunable) Numeric: Alpha for GLMNET.
#' @param lambda Numeric: Lambda for GLMNET.
#' @param ifw_glmnet (Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the GLMNET step.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification. This applies IFW
#' to both LightGBM and GLMNET.
#'
#' @return LightRuleFitHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' lightrulefit_hyperparams <- setup_LightRuleFit(nrounds = 300L, max_depth = 3L)
#' lightrulefit_hyperparams
setup_LightRuleFit <- function(
  nrounds = 200L,
  num_leaves = 32L,
  max_depth = 4L,
  learning_rate = 0.1,
  subsample = 0.666,
  subsample_freq = 1L,
  lambda_l1 = 0,
  lambda_l2 = 0,
  objective = NULL,
  ifw_lightgbm = FALSE,
  alpha = 1,
  lambda = NULL,
  ifw_glmnet = FALSE,
  ifw = FALSE
) {
  nrounds <- clean_posint(nrounds)
  num_leaves <- clean_posint(num_leaves)
  max_depth <- clean_int(max_depth)
  check_floatpos1(learning_rate)
  check_floatpos1(subsample)
  subsample_freq <- clean_posint(subsample_freq)
  check_inherits(lambda_l1, "numeric")
  check_inherits(lambda_l2, "numeric")
  check_float01inc(alpha)
  check_inherits(lambda, "numeric")
  check_logical(ifw_lightgbm)
  check_logical(ifw_glmnet)
  check_logical(ifw)
  # If ifw, cannot have ifw_lightgbm or ifw_glmnet
  if (ifw) {
    if (ifw_lightgbm) {
      cli::cli_abort("Cannot set ifw and ifw_lightgbm at the same time.")
    }
    if (ifw_glmnet) {
      cli::cli_abort("Cannot set ifw and ifw_glmnet at the same time.")
    }
  }
  LightRuleFitHyperparameters(
    nrounds = nrounds,
    num_leaves = num_leaves,
    max_depth = max_depth,
    learning_rate = learning_rate,
    subsample = subsample,
    subsample_freq = subsample_freq,
    lambda_l1 = lambda_l1,
    lambda_l2 = lambda_l2,
    objective = objective,
    ifw_lightgbm = ifw_lightgbm,
    alpha = alpha,
    lambda = lambda,
    ifw_glmnet = ifw_glmnet,
    ifw = ifw
  )
} # /rtemis::setup_LightRuleFit


# %% IsotonicHyperparameters ----
Isotonic_tunable <- character()
Isotonic_fixed <- character()

#' @title IsotonicHyperparameters
#'
#' @description
#' Hyperparameters subclass for Isotonic Regression.
#'
#' @author EDG
#' @keywords internal
#' @noRd
IsotonicHyperparameters <- new_class(
  name = "IsotonicHyperparameters",
  parent = Hyperparameters,
  constructor = function(ifw) {
    new_object(
      Hyperparameters(
        algorithm = "Isotonic",
        hyperparameters = list(
          ifw = ifw
        ),
        tunable_hyperparameters = "ifw",
        fixed_hyperparameters = Isotonic_fixed
      )
    )
  }
) # /rtemis::IsotonicHyperparameters


# %% setup_Isotonic ----
#' Setup Isotonic Hyperparameters
#'
#' Setup hyperparameters for Isotonic Regression.
#'
#' There are not hyperparameters for this algorithm at this moment.
#'
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return IsotonicHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' isotonic_hyperparams <- setup_Isotonic(ifw = TRUE)
#' isotonic_hyperparams
setup_Isotonic <- function(ifw = FALSE) {
  IsotonicHyperparameters(ifw = ifw)
} # /rtemis::setup_Isotonic


# %% SVMHyperparameters ----
#' @title SVMHyperparameters
#'
#' @description
#' Hyperparameters subclass for SVM.
#'
#' @author EDG
#' @keywords internal
#' @noRd
SVMHyperparameters <- new_class(
  name = "SVMHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    hyperparameters,
    tunable_hyperparameters,
    fixed_hyperparameters
  ) {
    new_object(
      Hyperparameters(
        algorithm = "SVM",
        hyperparameters = hyperparameters,
        tunable_hyperparameters = tunable_hyperparameters,
        fixed_hyperparameters = fixed_hyperparameters
      )
    )
  } # /constructor
) # /rtemis::SVMHyperparameters

# %% LinearSVMHyperparameters ----
LinearSVM_tunable <- c("cost", "ifw")
LinearSVM_fixed <- character()

#' @title LinearSVMHyperparameters
#'
#' @description
#' Hyperparameters subclass for SVM with linear kernel.
#'
#' @author EDG
#' @keywords internal
#' @noRd
LinearSVMHyperparameters <- new_class(
  name = "LinearSVMHyperparameters",
  parent = Hyperparameters,
  constructor = function(cost, ifw) {
    new_object(
      Hyperparameters(
        algorithm = "LinearSVM",
        hyperparameters = list(
          kernel = "linear",
          cost = cost,
          ifw = ifw
        ),
        tunable_hyperparameters = c("cost", "ifw"),
        fixed_hyperparameters = character()
      )
    )
  } # /constructor
) # /rtemis::LinearSVMHyperparameters


# %% setup_LinearSVM ----
#' Setup LinearSVM Hyperparameters
#'
#' Setup hyperparameters for LinearSVM training.
#'
#' Get more information from [e1071::svm].
#' @param cost (Tunable) Numeric: Cost of constraints violation.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return LinearSVMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' linear_svm_hyperparams <- setup_LinearSVM(cost = 0.5, ifw = TRUE)
#' linear_svm_hyperparams
setup_LinearSVM <- function(
  cost = 1,
  ifw = FALSE
) {
  check_inherits(cost, "numeric")
  check_logical(ifw)
  LinearSVMHyperparameters(
    cost = cost,
    ifw = ifw
  )
} # /setup_LinearSVM

# Test that all SVM hyperparameters are set by setup_SVM
stopifnot(all(
  c(LinearSVM_tunable, LinearSVM_fixed) %in% names(formals(setup_LinearSVM))
))


# %% RadialSVMHyperparameters ----
RadialSVM_tunable <- c("cost", "gamma", "ifw")
RadialSVM_fixed <- character()

#' @title RadialSVMHyperparameters
#'
#' @description
#' Hyperparameters subclass for SVM with radial kernel.
#'
#' @author EDG
#' @keywords internal
#' @noRd
RadialSVMHyperparameters <- new_class(
  name = "RadialSVMHyperparameters",
  parent = Hyperparameters,
  constructor = function(cost, gamma, ifw) {
    new_object(
      Hyperparameters(
        algorithm = "RadialSVM",
        hyperparameters = list(
          kernel = "radial",
          cost = cost,
          gamma = gamma,
          ifw = ifw
        ),
        tunable_hyperparameters = c("cost", "gamma", "ifw"),
        fixed_hyperparameters = character()
      )
    )
  } # /constructor
) # /rtemis::RadialSVMHyperparameters


# %% setup_RadialSVM ----
#' Setup RadialSVM Hyperparameters
#'
#' Setup hyperparameters for RadialSVM training.
#'
#' Get more information from [e1071::svm].
#'
#' @param cost (Tunable) Numeric: Cost of constraints violation.
#' @param gamma (Tunable) Numeric: Kernel coefficient.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return RadialSVMHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' radial_svm_hyperparams <- setup_RadialSVM(cost = 10, gamma = 0.1, ifw = TRUE)
#' radial_svm_hyperparams
setup_RadialSVM <- function(
  cost = 1,
  gamma = 0.01,
  ifw = FALSE
) {
  check_inherits(cost, "numeric")
  check_inherits(gamma, "numeric")
  check_logical(ifw)
  RadialSVMHyperparameters(
    cost = cost,
    gamma = gamma,
    ifw = ifw
  )
} # /setup_RadialSVM


# Test that all SVM hyperparameters are set by setup_SVM
stopifnot(all(
  c(RadialSVM_tunable, RadialSVM_fixed) %in% names(formals(setup_RadialSVM))
))


# %% TabNetHyperparameters ----
tabnet_tunable <- c(
  "batch_size",
  "penalty",
  "clip_value",
  "loss",
  "epochs",
  "drop_last",
  "decision_width",
  "attention_width",
  "num_steps",
  "feature_reusage",
  "mask_type",
  "virtual_batch_size",
  "valid_split",
  "learn_rate",
  "optimizer",
  "lr_scheduler",
  "lr_decay",
  "step_size",
  "checkpoint_epochs",
  "cat_emb_dim",
  "num_independent",
  "num_shared",
  "num_independent_decoder",
  "num_shared_decoder",
  "momentum",
  "pretraining_ratio",
  "importance_sample_size",
  "early_stopping_monitor",
  "early_stopping_tolerance",
  "early_stopping_patience",
  "ifw"
)

tabnet_fixed <- c("device", "num_workers", "skip_importance")


#' @title TabNetHyperparameters
#'
#' @description
#' Hyperparameters subclass for TabNet.
#'
#' @author EDG
#' @keywords internal
#' @noRd
TabNetHyperparameters <- new_class(
  name = "TabNetHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    batch_size,
    penalty,
    clip_value,
    loss,
    epochs,
    drop_last,
    decision_width,
    attention_width,
    num_steps,
    feature_reusage,
    mask_type,
    virtual_batch_size,
    valid_split,
    learn_rate,
    optimizer,
    lr_scheduler,
    lr_decay,
    step_size,
    checkpoint_epochs,
    cat_emb_dim,
    num_independent,
    num_shared,
    num_independent_decoder,
    num_shared_decoder,
    momentum,
    pretraining_ratio,
    device,
    importance_sample_size,
    early_stopping_monitor,
    early_stopping_tolerance,
    early_stopping_patience,
    num_workers,
    skip_importance,
    ifw
  ) {
    new_object(
      Hyperparameters(
        algorithm = "TabNet",
        hyperparameters = list(
          batch_size = batch_size,
          penalty = penalty,
          clip_value = clip_value,
          loss = loss,
          epochs = epochs,
          drop_last = drop_last,
          decision_width = decision_width,
          attention_width = attention_width,
          num_steps = num_steps,
          feature_reusage = feature_reusage,
          mask_type = mask_type,
          virtual_batch_size = virtual_batch_size,
          valid_split = valid_split,
          learn_rate = learn_rate,
          optimizer = optimizer,
          lr_scheduler = lr_scheduler,
          lr_decay = lr_decay,
          step_size = step_size,
          checkpoint_epochs = checkpoint_epochs,
          cat_emb_dim = cat_emb_dim,
          num_independent = num_independent,
          num_shared = num_shared,
          num_independent_decoder = num_independent_decoder,
          num_shared_decoder = num_shared_decoder,
          momentum = momentum,
          pretraining_ratio = pretraining_ratio,
          device = device,
          importance_sample_size = importance_sample_size,
          early_stopping_monitor = early_stopping_monitor,
          early_stopping_tolerance = early_stopping_tolerance,
          early_stopping_patience = early_stopping_patience,
          num_workers = num_workers,
          skip_importance = skip_importance,
          ifw = ifw
        ),
        tunable_hyperparameters = tabnet_tunable,
        fixed_hyperparameters = tabnet_fixed
      )
    )
  } # /constructor
) # /rtemis::TabNetHyperparameters


# %% setup_TabNet ----
#' Setup TabNet Hyperparameters
#'
#' Setup hyperparameters for TabNet training.
#'
# Get more information from [tabnet::tabnet_config]
#'
#' @param batch_size (Tunable) Positive integer: Batch size.
#' @param penalty (Tunable) Numeric: Regularization penalty.
#' @param clip_value Numeric: Clip value.
#' @param loss Character: Loss function.
#' @param epochs (Tunable) Positive integer: Number of epochs.
#' @param drop_last Logical: If TRUE, drop last batch.
#' @param decision_width (Tunable) Positive integer: Decision width.
#' @param attention_width (Tunable) Positive integer: Attention width.
#' @param num_steps (Tunable) Positive integer: Number of steps.
#' @param feature_reusage (Tunable) Numeric: Feature reusage.
#' @param mask_type Character: Mask type.
#' @param virtual_batch_size (Tunable) Positive integer: Virtual batch size.
#' @param valid_split Numeric: Validation split.
#' @param learn_rate (Tunable) Numeric: Learning rate.
#' @param optimizer Character or torch function: Optimizer.
#' @param lr_scheduler Character or torch function: "step", "reduce_on_plateau".
#' @param lr_decay Numeric: Learning rate decay.
#' @param step_size Positive integer: Step size.
#' @param checkpoint_epochs (Tunable) Positive integer: Checkpoint epochs.
#' @param cat_emb_dim (Tunable) Positive integer: Categorical embedding dimension.
#' @param num_independent (Tunable) Positive integer: Number of independent Gated Linear Units (GLU)
#' at each step of the encoder.
#' @param num_shared (Tunable) Positive integer: Number of shared Gated Linear Units (GLU) at each
#' step of the encoder.
#' @param num_independent_decoder (Tunable) Positive integer: Number of independent GLU layers for
#' pretraining.
#' @param num_shared_decoder (Tunable) Positive integer: Number of shared GLU layers for
#' pretraining.
#' @param momentum (Tunable) Numeric: Momentum.
#' @param pretraining_ratio (Tunable) Numeric: Pretraining ratio.
#' @param device Character: Device "cpu" or "cuda".
#' @param importance_sample_size Positive integer: Importance sample size.
#' @param early_stopping_monitor Character: Early stopping monitor. "valid_loss", "train_loss",
#' "auto".
#' @param early_stopping_tolerance Numeric: Minimum relative improvement to reset the patience
#' counter.
#' @param early_stopping_patience Positive integer: Number of epochs without improving before
#' stopping.
#' @param num_workers Positive integer: Number of subprocesses for data loacding.
#' @param skip_importance Logical: If TRUE, skip importance calculation.
#' @param ifw Logical: If TRUE, use Inverse Frequency Weighting in classification.
#'
#' @return TabNetHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' tabnet_hyperparams <- setup_TabNet(epochs = 100L, learn_rate = 0.01)
#' tabnet_hyperparams
setup_TabNet <- function(
  batch_size = 1024^2,
  penalty = 0.001,
  clip_value = NULL,
  loss = "auto",
  epochs = 50L,
  drop_last = FALSE,
  decision_width = NULL,
  attention_width = NULL,
  num_steps = 3L,
  feature_reusage = 1.3,
  mask_type = "sparsemax",
  virtual_batch_size = 256^2,
  valid_split = 0,
  learn_rate = 0.02,
  optimizer = "adam",
  lr_scheduler = NULL,
  lr_decay = 0.1,
  step_size = 30,
  checkpoint_epochs = 10L,
  cat_emb_dim = 1L,
  num_independent = 2L,
  num_shared = 2L,
  num_independent_decoder = 1L,
  num_shared_decoder = 1L,
  momentum = 0.02,
  pretraining_ratio = 0.5,
  device = "auto",
  importance_sample_size = NULL,
  early_stopping_monitor = "auto",
  early_stopping_tolerance = 0,
  early_stopping_patience = 0,
  num_workers = 0L,
  skip_importance = FALSE,
  ifw = FALSE
) {
  TabNetHyperparameters(
    batch_size = batch_size,
    penalty = penalty,
    clip_value = clip_value,
    loss = loss,
    epochs = epochs,
    drop_last = drop_last,
    decision_width = decision_width,
    attention_width = attention_width,
    num_steps = num_steps,
    feature_reusage = feature_reusage,
    mask_type = mask_type,
    virtual_batch_size = virtual_batch_size,
    valid_split = valid_split,
    learn_rate = learn_rate,
    optimizer = optimizer,
    lr_scheduler = lr_scheduler,
    lr_decay = lr_decay,
    step_size = step_size,
    checkpoint_epochs = checkpoint_epochs,
    cat_emb_dim = cat_emb_dim,
    num_independent = num_independent,
    num_shared = num_shared,
    num_independent_decoder = num_independent_decoder,
    num_shared_decoder = num_shared_decoder,
    momentum = momentum,
    pretraining_ratio = pretraining_ratio,
    device = device,
    importance_sample_size = importance_sample_size,
    early_stopping_monitor = early_stopping_monitor,
    early_stopping_tolerance = early_stopping_tolerance,
    early_stopping_patience = early_stopping_patience,
    num_workers = num_workers,
    skip_importance = skip_importance,
    ifw = ifw
  )
} # /setup_TabNet

# Test that all TabNet hyperparameters are set by setup_TabNet
stopifnot(all(
  c(tabnet_tunable, tabnet_fixed) %in% names(formals(setup_TabNet))
))

get_tabnet_config <- function(hyperparameters) {
  check_is_S7(hyperparameters, TabNetHyperparameters)
  hpr <- hyperparameters@hyperparameters
  hpr[["ifw"]] <- NULL
  do.call(tabnet::tabnet_config, hpr)
} # /get_tabnet_config


# %% RangerHyperparameters ----
ranger_tunable <- c(
  "num_trees",
  "mtry",
  "min_node_size",
  "max_depth",
  "sample_fraction",
  "replace",
  "splitrule",
  "num_random_splits",
  "alpha",
  "minprop",
  "regularization_factor",
  "ifw"
)

ranger_fixed <- c(
  "importance",
  "write_forest",
  "probability",
  "min_bucket",
  "case_weights", # set by train
  "class_weights", # set by train
  "poisson_tau",
  "split_select_weights",
  "always_split_variables",
  "respect_unordered_factors",
  "scale_permutation_importance",
  "local_importance",
  "regularization_usedepth",
  "keep_inbag",
  "inbag",
  "holdout",
  "quantreg",
  "time_interest",
  "oob_error",
  "save_memory",
  "verbose",
  "node_stats",
  "seed",
  "na_action"
)


#' @title RangerHyperparameters
#'
#' @description
#' Hyperparameters subclass for Ranger Random Forest.
#'
#' @author EDG
#' @keywords internal
#' @noRd
RangerHyperparameters <- new_class(
  name = "RangerHyperparameters",
  parent = Hyperparameters,
  constructor = function(
    num_trees,
    mtry,
    importance,
    write_forest,
    probability,
    min_node_size,
    min_bucket,
    max_depth,
    replace,
    sample_fraction,
    case_weights,
    class_weights,
    splitrule,
    num_random_splits,
    alpha,
    minprop,
    poisson_tau,
    split_select_weights,
    always_split_variables,
    respect_unordered_factors,
    scale_permutation_importance,
    local_importance,
    regularization_factor,
    regularization_usedepth,
    keep_inbag,
    inbag,
    holdout,
    quantreg,
    time_interest,
    oob_error,
    save_memory,
    verbose,
    node_stats,
    seed,
    na_action,
    ifw
  ) {
    new_object(
      Hyperparameters(
        algorithm = "Ranger",
        hyperparameters = list(
          num_trees = num_trees,
          mtry = mtry,
          importance = importance,
          write_forest = write_forest,
          probability = probability,
          min_node_size = min_node_size,
          min_bucket = min_bucket,
          max_depth = max_depth,
          replace = replace,
          sample_fraction = sample_fraction,
          case_weights = case_weights,
          class_weights = class_weights,
          splitrule = splitrule,
          num_random_splits = num_random_splits,
          alpha = alpha,
          minprop = minprop,
          poisson_tau = poisson_tau,
          split_select_weights = split_select_weights,
          always_split_variables = always_split_variables,
          respect_unordered_factors = respect_unordered_factors,
          scale_permutation_importance = scale_permutation_importance,
          local_importance = local_importance,
          regularization_factor = regularization_factor,
          regularization_usedepth = regularization_usedepth,
          keep_inbag = keep_inbag,
          inbag = inbag,
          holdout = holdout,
          quantreg = quantreg,
          time_interest = time_interest,
          oob_error = oob_error,
          save_memory = save_memory,
          verbose = verbose,
          node_stats = node_stats,
          seed = seed,
          na_action = na_action,
          ifw = ifw
        ),
        tunable_hyperparameters = ranger_tunable,
        fixed_hyperparameters = ranger_fixed
      )
    )
  } # /constructor
) # /rtemis::RangerHyperparameters


# %% setup_Ranger ----
#' Setup Ranger Hyperparameters
#'
#' Setup hyperparameters for Ranger Random Forest training.
#'
#' Get more information from [ranger::ranger].
#'
#' @param num_trees (Tunable) Positive integer: Number of trees.
#' @param mtry (Tunable) Positive integer: Number of features to consider at each split.
#' @param importance Character: Variable importance mode. "none", "impurity", "impurity_corrected", "permutation".
#' The "impurity" measure is the Gini index for classification, the variance of the responses for regression.
#' @param write_forest Logical: Save ranger.forest object, required for prediction. Set to FALSE to reduce memory usage if no prediction intended.
#' @param probability Logical: Grow a probability forest as in Malley et al. (2012). For classification only.
#' @param min_node_size (Tunable) Positive integer: Minimal node size. Default 1 for classification, 5 for regression, 3 for survival, and 10 for probability.
#' @param min_bucket Positive integer: Minimal number of samples in a terminal node. Only for survival. Deprecated in favor of min_node_size.
#' @param max_depth (Tunable) Positive integer: Maximal tree depth. A value of NULL or 0 (the default) corresponds to unlimited depth, 1 to tree stumps (1 split per tree).
#' @param replace Logical: Sample with replacement.
#' @param sample_fraction (Tunable) Numeric: Fraction of observations to sample. Default is 1 for sampling with replacement and 0.632 for sampling without replacement.
#' @param case_weights Numeric vector: Weights for sampling of training observations. Observations with larger weights will be selected with higher probability in the bootstrap (or subsampled) samples for the trees.
#' @param class_weights Numeric vector: Weights for the outcome classes for classification. Vector of the same length as the number of classes, with names corresponding to the class labels.
#' @param splitrule (Tunable) Character: Splitting rule. For classification: "gini", "extratrees", "hellinger". For regression: "variance", "extratrees", "maxstat", "beta". For survival: "logrank", "extratrees", "C", "maxstat".
#' @param num_random_splits (Tunable) Positive integer: For "extratrees" splitrule: Number of random splits to consider for each candidate splitting variable.
#' @param alpha (Tunable) Numeric: For "maxstat" splitrule: significance threshold to allow splitting.
#' @param minprop (Tunable) Numeric: For "maxstat" splitrule: lower quantile of covariate distribution to be considered for splitting.
#' @param poisson_tau Numeric: For "poisson" regression splitrule: tau parameter for Poisson regression.
#' @param split_select_weights Numeric vector: Numeric vector with weights between 0 and 1, representing the probability to select variables for splitting. Alternatively, a list of size num_trees, with one weight vector per tree.
#' @param always_split_variables Character vector: Character vector with variable names to be always selected in addition to the mtry variables tried for splitting.
#' @param respect_unordered_factors Character or logical: Handling of unordered factor covariates. For "partition" all 2^(k-1)-1 possible partitions are considered for splitting, where k is the number of factor levels. For "ignore", all factor levels are ordered by their first occurrence in the data. For "order", all factor levels are ordered by their average response. TRUE corresponds to "partition" for the randomForest package compatibility.
#' @param scale_permutation_importance Logical: Scale permutation importance by standard error as in (Breiman 2001). Only applicable if permutation variable importance mode selected.
#' @param local_importance Logical: For permutation variable importance, use local importance as in Breiman (2001) and Liaw & Wiener (2002).
#' @param regularization_factor (Tunable) Numeric: Regularization factor. Penalize variables with many split points. Requires splitrule = "variance".
#' @param regularization_usedepth Logical: Use regularization factor with node depth. Requires regularization_factor.
#' @param keep_inbag Logical: Save how often observations are in-bag in each tree. These will be used for (local) variable importance if inbag.counts in predict() is NULL.
#' @param inbag List: Manually set observations per tree. List of size num_trees, containing inbag counts for each observation. Can be used for stratified sampling.
#' @param holdout Logical: Hold-out mode. Hold-out all samples with case weight 0 and use these for variable importance and prediction error.
#' @param quantreg Logical: Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). For regression only. Set keep_inbag = TRUE to prepare out-of-bag quantile prediction.
#' @param time_interest Numeric: For GWAS data: SNP with this number will be used as time variable. Only for survival. Deprecated, use time.var in formula instead.
#' @param oob_error Logical: Compute OOB prediction error. Set to FALSE to save computation time if only the forest is needed.
#' @param save_memory Logical: Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems.
#' @param verbose Logical: Show computation status and estimated runtime.
#' @param node_stats Logical: Save additional node statistics. Only terminal nodes for now.
#' @param seed Positive integer: Random seed. Default is NULL, which generates the seed from R. Set to 0 to ignore the R seed.
#' @param na_action Character: Action to take if the data contains missing values. "na.learn" uses observations with missing values in splitting, treating missing values as a separate category.
#' @param ifw Logical: Inverse Frequency Weighting for classification. If TRUE, class weights are set inversely proportional to the class frequencies.
#'
#' @return RangerHyperparameters object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' ranger_hyperparams <- setup_Ranger(num_trees = 1000L, ifw = FALSE)
#' ranger_hyperparams
setup_Ranger <- function(
  num_trees = 500,
  mtry = NULL,
  importance = "impurity",
  write_forest = TRUE,
  probability = FALSE,
  min_node_size = NULL,
  min_bucket = NULL,
  max_depth = NULL,
  replace = TRUE,
  sample_fraction = ifelse(replace, 1, 0.632),
  case_weights = NULL,
  class_weights = NULL,
  splitrule = NULL,
  num_random_splits = 1,
  alpha = 0.5,
  minprop = 0.1,
  poisson_tau = 1,
  split_select_weights = NULL,
  always_split_variables = NULL,
  respect_unordered_factors = NULL,
  scale_permutation_importance = FALSE,
  local_importance = FALSE,
  regularization_factor = 1,
  regularization_usedepth = FALSE,
  keep_inbag = FALSE,
  inbag = NULL,
  holdout = FALSE,
  quantreg = FALSE,
  time_interest = NULL,
  oob_error = TRUE,
  save_memory = FALSE,
  verbose = TRUE,
  node_stats = FALSE,
  seed = NULL,
  na_action = "na.learn",
  ifw = FALSE
) {
  num_trees <- clean_posint(num_trees)
  mtry <- clean_posint(mtry)
  check_inherits(importance, "character")
  check_inherits(write_forest, "logical")
  check_inherits(probability, "logical")
  min_node_size <- clean_posint(min_node_size)
  min_bucket <- clean_posint(min_bucket)
  max_depth <- clean_posint(max_depth)
  check_inherits(replace, "logical")
  check_float01inc(sample_fraction)
  check_inherits(case_weights, "numeric")
  check_inherits(class_weights, "numeric")
  check_inherits(splitrule, "character")
  num_random_splits <- clean_posint(num_random_splits)
  check_float01inc(alpha)
  check_float01inc(minprop)
  check_inherits(poisson_tau, "numeric")
  check_inherits(split_select_weights, "numeric")
  check_inherits(always_split_variables, "character")
  check_inherits(respect_unordered_factors, "logical")
  check_inherits(scale_permutation_importance, "logical")
  check_inherits(local_importance, "logical")
  check_inherits(regularization_factor, "numeric")
  check_inherits(regularization_usedepth, "logical")
  check_inherits(keep_inbag, "logical")
  check_inherits(inbag, "list")
  check_inherits(holdout, "logical")
  check_inherits(quantreg, "logical")
  check_inherits(time_interest, "numeric")
  check_inherits(oob_error, "logical")
  check_inherits(save_memory, "logical")
  check_inherits(verbose, "logical")
  check_inherits(node_stats, "logical")
  check_inherits(seed, "numeric")
  check_inherits(na_action, "character")
  check_logical(ifw)
  RangerHyperparameters(
    num_trees = num_trees,
    mtry = mtry,
    importance = importance,
    write_forest = write_forest,
    probability = probability,
    min_node_size = min_node_size,
    min_bucket = min_bucket,
    max_depth = max_depth,
    replace = replace,
    sample_fraction = sample_fraction,
    case_weights = case_weights,
    class_weights = class_weights,
    splitrule = splitrule,
    num_random_splits = num_random_splits,
    alpha = alpha,
    minprop = minprop,
    poisson_tau = poisson_tau,
    split_select_weights = split_select_weights,
    always_split_variables = always_split_variables,
    respect_unordered_factors = respect_unordered_factors,
    scale_permutation_importance = scale_permutation_importance,
    local_importance = local_importance,
    regularization_factor = regularization_factor,
    regularization_usedepth = regularization_usedepth,
    keep_inbag = keep_inbag,
    inbag = inbag,
    holdout = holdout,
    quantreg = quantreg,
    time_interest = time_interest,
    oob_error = oob_error,
    save_memory = save_memory,
    verbose = verbose,
    node_stats = node_stats,
    seed = seed,
    na_action = na_action,
    ifw = ifw
  )
} # /setup_Ranger

# Test that all Ranger hyperparameters are set by setup_Ranger
stopifnot(all(
  c(ranger_tunable, ranger_fixed) %in% names(formals(setup_Ranger))
))


# %% .list_to_Hyperparameters ----
#' Convert a list to a Hyperparameters object
#'
#' Internal function used by `rtemis.server` to reconstruct a `Hyperparameters`
#' object from a wire-format list. Not intended for direct use by end users.
#'
#' @param x Named list with two elements:
#'   \describe{
#'     \item{`algorithm`}{Character: algorithm name, e.g. `"GLM"`, `"RF"`.}
#'     \item{`hyperparameters`}{Named list of hyperparameter name-value pairs
#'       passed to the corresponding `setup_<algorithm>()` function.}
#'   }
#'
#' @return A `Hyperparameters` object as returned by `setup_<algorithm>()`.
#'
#' @author EDG
#' @keywords internal
#' @export
.list_to_Hyperparameters <- function(x) {
  fn <- paste0("setup_", x[["algorithm"]])
  if (!exists(fn, mode = "function")) {
    cli::cli_abort(".val Invalid algorithm: {x[['algorithm']]}.")
  }
  args <- x[["hyperparameters"]]
  # Keep only arguments that are in the setup function
  setup_formals <- names(formals(get(fn)))
  args <- args[names(args) %in% setup_formals]
  do.call(fn, args)
}


================================================
FILE: R/03_Metrics.R
================================================
# S7_Metrics.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% Metrics ----
#' @title Metrics
#'
#' @description
#' Superclass for Metrics metrics.
#'
#' @field sample Character: Sample name.
#' @field metrics List or data.frame: Metrics.
#'
#' @author EDG
#' @noRd
Metrics <- new_class(
  name = "Metrics",
  properties = list(
    sample = class_character | NULL,
    metrics = class_list | class_data.frame
  )
) # /rtemis::Metrics


# %% `$`.Metrics ----
# Make Metrics@metrics `$`-accessible
method(`$`, Metrics) <- function(x, name) {
  x@metrics[[name]]
}


# %% `.DollarNames`.Metrics ----
# `$`-autocomplete Metrics@metrics
method(`.DollarNames`, Metrics) <- function(x, pattern = "") {
  all_names <- names(x@metrics)
  grep(pattern, all_names, value = TRUE)
}


# %% `[[`.Metrics ----
# Make Metrics@metrics `[[`-accessible
method(`[[`, Metrics) <- function(x, name) {
  x@metrics[[name]]
}


# %% RegressionMetrics ----
#' @title RegressionMetrics
#'
#' @description
#' Metrics subclass for regression models.
#'
#' @author EDG
#' @noRd
RegressionMetrics <- new_class(
  name = "RegressionMetrics",
  parent = Metrics,
  # properties = list(
  #   MAE = class_numeric,
  #   MSE = class_numeric,
  #   RMSE = class_numeric,
  #   Rsq = class_numeric
  # ),
  constructor = function(MAE, MSE, RMSE, Rsq, sample = NULL) {
    new_object(
      Metrics(
        sample = sample,
        metrics = data.frame(
          MAE = MAE,
          MSE = MSE,
          RMSE = RMSE,
          Rsq = Rsq
        )
      )
    )
  }
) # /rtemis::RegressionMetrics


# %% repr.RegressionMetrics ----
# Show RegressionMetrics ----
method(repr, RegressionMetrics) <- function(
  x,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  out <- if (!is.null(x@sample)) {
    repr_S7name(
      paste(x@sample, "Regression Metrics"),
      pad = pad,
      output_type = output_type
    )
  } else {
    repr_S7name("Regression Metrics", pad = pad, output_type = output_type)
  }
  out <- paste0(
    out,
    repr_ls(
      x@metrics,
      print_class = FALSE,
      print_df = TRUE,
      pad = pad + 2L,
      output_type = output_type
    )
  )
  out
} # /rtemis::repr.RegressionMetrics


# %% print.RegressionMetrics ----
method(print, RegressionMetrics) <- function(
  x,
  pad = 0L,
  output_type = c("ansi", "html", "plain"),
  ...
) {
  cat(repr(x, pad = pad, output_type = output_type))
  invisible(x)
} # /rtemis::print.RegressionMetrics


# %% ClassificationMetrics ----
#' @title ClassificationMetrics
#'
#' @description
#' Metrics subclass for classification models.
#'
#' @author EDG
#' @keywords internal
#' @noRd
ClassificationMetrics <- new_class(
  name = "ClassificationMetrics",
  parent = Metrics,
  constructor = function(
    Confusion_Matrix,
    Overall,
    Class,
    Positive_Class,
    sample = NULL
  ) {
    new_object(
      Metrics(
        sample = sample,
        metrics = list(
          Confusion_Matrix = Confusion_Matrix,
          Overall = Overall,
          Class = Class,
          Positive_Class = Positive_Class
        )
      )
    )
  }
) # /rtemis::ClassificationMetrics


# %% repr.ClassificationMetrics ----
method(repr, ClassificationMetrics) <- function(
  x,
  decimal_places = 3L,
  pad = 0L,
  output_type = NULL,
  ...
) {
  output_type <- get_output_type(output_type)

  if (!is.null(x@sample)) {
    out <- repr_S7name(
      paste(x@sample, "Classification Metrics"),
      pad = pad,
      output_type = output_type
    )
  } else {
    out <- repr_S7name(
      "Classification Metrics",
      pad = pad,
      output_type = output_type
    )
  }
  # Confusion Matrix
  # suggestion: document 17 and 9
  tblpad <- 17L -
    max(nchar(colnames(x@metrics[["Confusion_Matrix"]])), 9L) +
    pad
  out <- paste0(
    out,
    show_table(x[["Confusion_Matrix"]], pad = tblpad, output_type = output_type)
  )
  out <- paste0(
    out,
    "\n",
    show_df(
      x@metrics[["Overall"]],
      pad = pad,
      transpose = TRUE,
      ddSci_dp = decimal_places,
      justify = "left",
      spacing = 2L,
      output_type = output_type
    )
  )

  if (is.na(x@metrics[["Positive_Class"]])) {
    out <- paste0(
      out,
      show_df(
        x@metrics[["Class"]],
        pad = pad,
        transpose = TRUE,
        ddSci_dp = decimal_places,
        justify = "left",
        spacing = 2,
        output_type = output_type
      )
    )
  } else {
    out <- paste0(
      out,
      "\n     Positive Class ",
      fmt(
        x@metrics[["Positive_Class"]],
        col = highlight_col,
        bold = TRUE,
        output_type = output_type
      ),
      "\n"
    )
  }
  out
} # /rtemis::repr.ClassificationMetrics


# %% print.ClassificationMetrics ----
method(print, ClassificationMetrics) <- function(
  x,
  decimal_places = 3,
  pad = 0L,
  output_type = c("ansi", "html", "plain"),
  ...
) {
  cat(repr(
    x,
    decimal_places = decimal_places,
    pad = pad,
    output_type = output_type
  ))
  invisible(x)
} # /rtemis::print.ClassificationMetrics


# %% MetricsRes ----
#' @title MetricsRes
#'
#' @description
#' Superclass for MetricsRes metrics.
#'
#' @field sample Character: Sample name.
#'
#' @author EDG
#' @noRd
MetricsRes <- new_class(
  name = "MetricsRes",
  properties = list(
    sample = class_character | NULL,
    res_metrics = class_list,
    mean_metrics = class_data.frame,
    sd_metrics = class_data.frame
  )
) # /rtemis::MetricsRes


# %% repr.MetricsRes ----
method(repr, MetricsRes) <- function(
  x,
  decimal_places = 3L,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  type <- if (S7_inherits(x, RegressionMetricsRes)) {
    "Regression"
  } else {
    "Classification"
  }
  out <- repr_S7name(
    paste("Resampled", type, x@sample, "Metrics"),
    pad = pad,
    output_type = output_type
  )
  out <- paste0(out, strrep(" ", pad))
  out <- paste0(
    out,
    italic("  Showing mean (sd) across resamples.\n", output_type = output_type)
  )
  # Create list with mean_metrics (sd_metrics)
  metricsl <- lapply(seq_along(x@mean_metrics), function(i) {
    paste0(
      ddSci(x@mean_metrics[[i]], decimal_places),
      gray(
        paste0(" (", ddSci(x@sd_metrics[[i]], decimal_places), ")"),
        output_type = output_type
      )
    )
  })
  names(metricsl) <- names(x@mean_metrics)
  out <- paste0(
    out,
    repr_ls(
      metricsl,
      print_class = FALSE,
      print_df = TRUE,
      pad = pad + 2L,
      output_type = output_type
    )
  )
  out
} # /rtemis::repr.MetricsRes


# %% print.MetricsRes ----
method(print, MetricsRes) <- function(
  x,
  decimal_places = 3L,
  pad = 0L,
  output_type = NULL,
  ...
) {
  cat(repr(x, decimal_places, pad = pad, output_type = output_type))
  invisible(x)
} # /rtemis::print.MetricsRes


# %% RegressionMetricsRes ----
#' @author EDG
#' @noRd
RegressionMetricsRes <- new_class(
  name = "RegressionMetricsRes",
  parent = MetricsRes,
  constructor = function(sample, res_metrics) {
    new_object(
      MetricsRes(
        sample = sample,
        res_metrics = res_metrics,
        mean_metrics = vec2df(
          colMeans(do.call(rbind, lapply(res_metrics, function(x) x@metrics)))
        ),
        sd_metrics = vec2df(
          sapply(do.call(rbind, lapply(res_metrics, function(x) x@metrics)), sd)
        )
      )
    )
  }
) # /rtemis::RegressionMetricsRes


#' @author EDG
#' @noRd
ClassificationMetricsRes <- new_class(
  name = "ClassificationMetricsRes",
  parent = MetricsRes,
  constructor = function(sample, res_metrics) {
    new_object(
      MetricsRes(
        sample = sample,
        res_metrics = res_metrics,
        mean_metrics = vec2df(
          colMeans(do.call(
            rbind,
            lapply(res_metrics, function(x) x@metrics[["Overall"]])
          ))
        ),
        sd_metrics = vec2df(
          sapply(
            do.call(
              rbind,
              lapply(res_metrics, function(x) x@metrics[["Overall"]])
            ),
            sd
          )
        )
      )
    )
  }
) # /rtemis::ClassificationMetricsRes


# %% repr.CalibratedClassification ----
#' @param x `ClassificationMetrics` before calibration.
#' @param x_cal `ClassificationMetrics` after calibration.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
repr_CalibratedClassificationMetrics <- function(
  x,
  x_cal,
  decimal_places = 2L,
  pad = 2L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)

  if (!is.null(x@sample)) {
    out <- repr_S7name(
      paste(x@sample, "Classification Metrics (Pre => Post Calibration)"),
      pad = pad,
      output_type = output_type
    )
  } else {
    out <- repr_S7name(
      "Classification Metrics (Pre => Post Calibration)",
      pad = pad,
      output_type = output_type
    )
  }

  # Confusion Matrix: Pre=>Post
  prepost_cm <- paste_tables(
    x@metrics[["Confusion_Matrix"]],
    x_cal@metrics[["Confusion_Matrix"]],
    sep = " => "
  )
  tblpad <- 17L -
    max(nchar(colnames(prepost_cm)), 9L) +
    pad
  out <- paste0(
    out,
    show_table(prepost_cm, pad = tblpad, output_type = output_type)
  )

  # Overall metrics: Pre=>Post
  # Note: decimal formatting handled by paste_dfs with decimal_places parameter
  out <- paste0(
    out,
    "\n",
    show_df(
      paste_dfs(
        x@metrics[["Overall"]],
        x_cal@metrics[["Overall"]],
        sep = " => ",
        decimal_places = decimal_places
      ),
      pad = pad,
      transpose = TRUE,
      ddSci_dp = NULL,
      justify = "left",
      spacing = 2L,
      output_type = output_type
    )
  )

  # Class metrics: Pre=>Post (for multiclass) or Positive Class (for binary)
  if (is.na(x@metrics[["Positive_Class"]])) {
    out <- paste0(
      out,
      show_df(
        paste_dfs(
          x@metrics[["Class"]],
          x_cal@metrics[["Class"]],
          decimal_places = decimal_places
        ),
        pad = pad,
        transpose = TRUE,
        ddSci_dp = NULL,
        justify = "left",
        spacing = 2,
        output_type = output_type
      )
    )
  } else {
    out <- paste0(
      out,
      "\n     Positive Class ",
      fmt(
        x@metrics[["Positive_Class"]],
        col = highlight_col,
        bold = TRUE,
        output_type = output_type
      ),
      "\n"
    )
  }
  out
} # /rtemis::repr_CalibratedClassification


# %% repr.CalibratedClassificationResMetrics ----
#' @param x `ClassificationMetricsRes` before calibration.
#' @param x_cal `ClassificationMetricsRes` after calibration.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
repr_CalibratedClassificationResMetrics <- function(
  x,
  x_cal,
  decimal_places = 2L,
  pad = 2L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)

  out <- repr_S7name(
    paste(
      "Resampled Classification",
      x@sample,
      "Metrics (Pre => Post Calibration)"
    ),
    pad = pad,
    output_type = output_type
  )
  out <- paste0(out, strrep(" ", pad))
  out <- paste0(
    out,
    italic(
      "  Showing mean (sd) across resamples, Pre => Post calibration.\n",
      output_type = output_type
    )
  )

  # Create pre and post formatted strings: mean (sd)
  pre_strings <- lapply(seq_along(x@mean_metrics), function(i) {
    paste0(
      ddSci(x@mean_metrics[[i]], decimal_places),
      gray(
        paste0(" (", ddSci(x@sd_metrics[[i]], decimal_places), ")"),
        output_type = output_type
      )
    )
  })
  names(pre_strings) <- names(x@mean_metrics)

  post_strings <- lapply(seq_along(x_cal@mean_metrics), function(i) {
    paste0(
      ddSci(x_cal@mean_metrics[[i]], decimal_places),
      gray(
        paste0(" (", ddSci(x_cal@sd_metrics[[i]], decimal_places), ")"),
        output_type = output_type
      )
    )
  })
  names(post_strings) <- names(x_cal@mean_metrics)

  # Combine pre=>post
  prepost_strings <- lapply(seq_along(pre_strings), function(i) {
    paste(pre_strings[[i]], post_strings[[i]], sep = " => ")
  })
  names(prepost_strings) <- names(pre_strings)

  out <- paste0(
    out,
    repr_ls(
      prepost_strings,
      print_class = FALSE,
      print_df = TRUE,
      pad = pad + 2L,
      output_type = output_type
    )
  )
  out
} # /rtemis::repr_CalibratedClassificationResMetrics


================================================
FILE: R/04_Preprocessor.R
================================================
# S7_Preprocessor.R
# ::rtemis::
# 2025- EDG rtemis.org

# References
# https://github.com/RConsortium/S7/
# https://rconsortium.github.io/S7

# %% PreprocessorConfig ----
#' @title PreprocessorConfig
#'
#' @description
#' PreprocessorConfig class.
#'
#' @author EDG
#' @noRd
PreprocessorConfig <- new_class(
  name = "PreprocessorConfig",
  properties = list(
    complete_cases = class_logical,
    remove_features_thres = class_numeric | NULL,
    remove_cases_thres = class_numeric | NULL,
    missingness = class_logical,
    impute = class_logical,
    impute_type = class_character,
    impute_missRanger_params = class_list,
    impute_discrete = class_character,
    impute_continuous = class_character,
    integer2factor = class_logical,
    integer2numeric = class_logical,
    logical2factor = class_logical,
    logical2numeric = class_logical,
    numeric2factor = class_logical,
    numeric2factor_levels = class_character | NULL,
    numeric_cut_n = class_numeric,
    numeric_cut_labels = class_logical,
    numeric_quant_n = class_numeric,
    numeric_quant_NAonly = class_logical,
    unique_len2factor = class_numeric,
    character2factor = class_logical,
    factorNA2missing = class_logical,
    factorNA2missing_level = class_character,
    factor2integer = class_logical,
    factor2integer_startat0 = class_logical,
    scale = class_logical,
    center = class_logical,
    scale_centers = class_numeric | NULL,
    scale_coefficients = class_numeric | NULL,
    remove_constants = class_logical,
    remove_constants_skip_missing = class_logical,
    remove_duplicates = class_logical,
    remove_features = class_character | NULL,
    one_hot = class_logical,
    one_hot_levels = class_list | NULL,
    add_date_features = class_logical,
    date_features = class_character,
    add_holidays = class_logical,
    exclude = class_character | NULL
  )
) # /PreprocessorConfig


# %% names.PreprocessorConfig ----
# Names PreprocessorConfig
method(names, PreprocessorConfig) <- function(x) {
  names(props(x))
}


# %% `$`.PreprocessorConfig ----
# Make props `$`-accessible
method(`$`, PreprocessorConfig) <- function(x, name) {
  props(x)[[name]]
}


# %% `.DollarNames`.PreprocessorConfig ----
# DollarSign tab-complete property names
method(`.DollarNames`, PreprocessorConfig) <- function(x, pattern = "") {
  all_names <- names(props(x))
  grep(pattern, all_names, value = TRUE)
}


# %% `[[`.PreprocessorConfig ----
# Make proprs `[[`-accessible
method(`[[`, PreprocessorConfig) <- function(x, name) {
  props(x)[[name]]
}


# %% repr.PreprocessorConfig ----
method(repr, PreprocessorConfig) <- function(
  x,
  limit = -1L,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name("PreprocessorConfig", pad = pad, output_type = output_type),
    repr_ls(props(x), pad = pad, limit = limit, output_type = output_type)
  )
} # /rtemis::repr.PreprocessorConfig


# %% print.PreprocessorConfig ----
method(print, PreprocessorConfig) <- function(
  x,
  limit = -1L,
  output_type = NULL,
  ...
) {
  cat(repr(x, limit = limit, output_type = output_type))
  invisible(x)
} # /rtemis::print.PreprocessorConfig


# %% setup_Preprocessor ----
#' Setup Preprocessor
#'
#' @description
#' Creates a `PreprocessorConfig` object, which can be used in [preprocess].
#'
#' @param complete_cases Logical: If TRUE, only retain complete cases (no missing data).
#' @param remove_cases_thres Float (0, 1): Remove cases with >= to this fraction
#' of missing features.
#' @param remove_features_thres Float (0, 1): Remove features with missing
#' values in >= to this fraction of cases.
#' @param missingness Logical: If TRUE, generate new boolean columns for each
#' feature with missing values, indicating which cases were missing data.
#' @param impute Logical: If TRUE, impute missing cases. See `impute_discrete` and
#' `impute_continuous`.
#' @param impute_type Character: Package to use for imputation.
#' @param impute_missRanger_params Named list with elements "pmm.k" and
#' "maxiter", which are passed to `missRanger::missRanger`. `pmm.k`
#' greater than 0 results in predictive mean matching. Default `pmm.k = 3`
#' `maxiter = 10` `num.trees = 500`. Reduce `num.trees` for
#' faster imputation especially in large datasets. Set `pmm.k = 0` to
#' disable predictive mean matching.
#' @param impute_discrete Character: Name of function that returns single value: How to impute
#' discrete variables for `impute_type = "meanMode"`.
#' @param impute_continuous Character: Name of function that returns single value: How to impute
#' continuous variables for `impute_type = "meanMode"`.
#' @param integer2factor Logical: If TRUE, convert all integers to factors. This includes
#' `bit64::integer64` columns.
#' @param integer2numeric Logical: If TRUE, convert all integers to numeric
#' (will only work if `integer2factor = FALSE`). This includes
#' `bit64::integer64` columns.
#' @param logical2factor Logical: If TRUE, convert all logical variables to
#' factors.
#' @param logical2numeric Logical: If TRUE, convert all logical variables to
#' numeric.
#' @param numeric2factor Logical: If TRUE, convert all numeric variables to
#' factors.
#' @param numeric2factor_levels Character vector: Optional - will be passed to
#' `levels` arg of `factor()` if `numeric2factor = TRUE`. For advanced/
#' specific use cases; need to know unique values of numeric vector(s) and given all
#' numeric vars have same unique values.
#' @param numeric_cut_n Integer: If > 0, convert all numeric variables to factors by
#' binning using `base::cut` with `breaks` equal to this number.
#' @param numeric_cut_labels Logical: The `labels` argument of [base::cut].
#' @param numeric_quant_n Integer: If > 0, convert all numeric variables to factors by
#' binning using `base::cut` with `breaks` equal to this number of quantiles.
#' produced using `stats::quantile`.
#' @param numeric_quant_NAonly Logical: If TRUE, only bin numeric variables with
#' missing values.
#' @param unique_len2factor Integer (>=2): Convert all variables with less
#' than or equal to this number of unique values to factors.
#' For example, if binary variables are encoded with 1, 2, you could use
#' `unique_len2factor = 2` to convert them to factors.
#' @param character2factor Logical: If TRUE, convert all character variables to
#' factors.
#' @param factorNA2missing Logical: If TRUE, make NA values in factors be of
#' level `factorNA2missing_level`. In many cases this is the preferred way
#' to handle missing data in categorical variables. Note that since this step
#' is performed before imputation, you can use this option to handle missing
#' data in categorical variables and impute numeric variables in the same
#' `preprocess` call.
#' @param factorNA2missing_level Character: Name of level if
#' `factorNA2missing = TRUE`.
#' @param factor2integer Logical: If TRUE, convert all factors to integers.
#' @param factor2integer_startat0 Logical: If TRUE, start integer coding at 0.
#' @param scale Logical: If TRUE, scale columns of `x`.
#' @param center Logical: If TRUE, center columns of `x`. Note that by
#' default it is the same as `scale`.
#' @param scale_centers Named vector: Centering values for each feature.
#' @param scale_coefficients Named vector: Scaling values for each feature.
#' @param remove_constants Logical: If TRUE, remove constant columns.
#' @param remove_constants_skip_missing Logical: If TRUE, skip missing values, before
#' checking if feature is constant.
#' @param remove_features Character vector: Features to remove.
#' @param remove_duplicates Logical: If TRUE, remove duplicate cases.
#' @param one_hot Logical: If TRUE, convert all factors using one-hot encoding.
#' @param one_hot_levels List: Named list of the form "feature_name" = "levels". Used when applying
#' one-hot encoding to validation or test data using `Preprocessor`.
#' @param add_date_features Logical: If TRUE, extract date features from date columns.
#' @param date_features Character vector: Features to extract from dates.
#' @param add_holidays Logical: If TRUE, extract holidays from date columns.
#' @param exclude Integer, vector: Exclude these columns from preprocessing.
#'
#' @section Order of Operations:
#'
#'   * keep complete cases only
#'   * remove constants
#'   * remove duplicates
#'   * remove cases by missingness threshold
#'   * remove features by missingness threshold
#'   * integer to factor
#'   * integer to numeric
#'   * logical to factor
#'   * logical to numeric
#'   * numeric to factor
#'   * cut numeric to n bins
#'   * cut numeric to n quantiles
#'   * numeric with less than N unique values to factor
#'   * character to factor
#'   * factor NA to named level
#'   * add missingness column
#'   * impute
#'   * scale and/or center
#'   * one-hot encoding
#'
#' @return `PreprocessorConfig` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' preproc_config <- setup_Preprocessor(factorNA2missing = TRUE)
#' preproc_config
setup_Preprocessor <- function(
  complete_cases = FALSE,
  remove_features_thres = NULL,
  remove_cases_thres = NULL,
  missingness = FALSE,
  impute = FALSE,
  impute_type = c(
    "missRanger",
    "micePMM",
    "meanMode"
  ),
  impute_missRanger_params = list(
    pmm.k = 3,
    maxiter = 10,
    num.trees = 500
  ),
  impute_discrete = "get_mode",
  impute_continuous = "mean",
  integer2factor = FALSE,
  integer2numeric = FALSE,
  logical2factor = FALSE,
  logical2numeric = FALSE,
  numeric2factor = FALSE,
  numeric2factor_levels = NULL,
  numeric_cut_n = 0,
  numeric_cut_labels = FALSE,
  numeric_quant_n = 0,
  numeric_quant_NAonly = FALSE,
  unique_len2factor = 0,
  character2factor = FALSE,
  factorNA2missing = FALSE,
  factorNA2missing_level = "missing",
  #    nonzeroFactors = FALSE,
  factor2integer = FALSE,
  factor2integer_startat0 = TRUE,
  scale = FALSE,
  center = scale,
  scale_centers = NULL,
  scale_coefficients = NULL,
  remove_constants = FALSE,
  remove_constants_skip_missing = TRUE,
  remove_features = NULL,
  remove_duplicates = FALSE,
  one_hot = FALSE,
  one_hot_levels = NULL,
  #    cleanfactorlevels = FALSE,
  add_date_features = FALSE,
  date_features = c("weekday", "month", "year"),
  add_holidays = FALSE,
  exclude = NULL
) {
  # Match args
  impute_type <- match.arg(impute_type)
  # Checks performed in the `PreprocessorConfig` constructor
  PreprocessorConfig(
    complete_cases = complete_cases,
    remove_features_thres = remove_features_thres,
    remove_cases_thres = remove_cases_thres,
    missingness = missingness,
    impute = impute,
    impute_type = impute_type,
    impute_missRanger_params = impute_missRanger_params,
    impute_discrete = impute_discrete,
    impute_continuous = impute_continuous,
    integer2factor = integer2factor,
    integer2numeric = integer2numeric,
    logical2factor = logical2factor,
    logical2numeric = logical2numeric,
    numeric2factor = numeric2factor,
    numeric2factor_levels = numeric2factor_levels,
    numeric_cut_n = numeric_cut_n,
    numeric_cut_labels = numeric_cut_labels,
    numeric_quant_n = numeric_quant_n,
    numeric_quant_NAonly = numeric_quant_NAonly,
    unique_len2factor = unique_len2factor,
    character2factor = character2factor,
    factorNA2missing = factorNA2missing,
    factorNA2missing_level = factorNA2missing_level,
    factor2integer = factor2integer,
    factor2integer_startat0 = factor2integer_startat0,
    scale = scale,
    center = center,
    scale_centers = scale_centers,
    scale_coefficients = scale_coefficients,
    remove_constants = remove_constants,
    remove_constants_skip_missing = remove_constants_skip_missing,
    remove_features = remove_features,
    remove_duplicates = remove_duplicates,
    one_hot = one_hot,
    one_hot_levels = one_hot_levels,
    add_date_features = add_date_features,
    date_features = date_features,
    add_holidays = add_holidays,
    exclude = exclude
  )
} # /setup_Preprocessor

# Note:
# data_dependent_props <- c(
#   "scale_centers", # Named vector with feature scaling centers.
#   "scale_coefficients", # Named vector with feature scaling coefficients.
#   "one_hot_levels", # Named list of the form "feature_name" = "levels".
#   "remove_features" # Character vector of feature names to remove.
# )

# %% Preprocessor ----
#' @title Preprocessor
#'
#' @description
#' Class to hold output of preprocessing values after applying `PreprocessorConfig` to
#' training dataset, so that the same preprocessing can be applied to validation and test
#' datasets.
#'
#' @field config `PreprocessorConfig` object.
#' @field preprocessed Data frame or list: Preprocessed data. If a single data.frame is passed to
#' `preprocess`, this will be a data.frame. If additional data sets are passed to the
#' `dat_validation` and/or `dat_test` arguments, this will be a named list.
#' @field values List: Data-dependent preprocessing values to be used for validation and test set
#' preprocessing.
#'
#' @author EDG
#' @noRd
Preprocessor <- new_class(
  name = "Preprocessor",
  properties = list(
    config = PreprocessorConfig,
    preprocessed = class_data.frame | class_list,
    values = class_list
  ),
  constructor = function(
    config,
    preprocessed,
    scale_centers = NULL,
    scale_coefficients = NULL,
    one_hot_levels = NULL,
    remove_features = NULL
  ) {
    new_object(
      S7_object(),
      config = config,
      preprocessed = preprocessed,
      values = list(
        scale_centers = scale_centers,
        scale_coefficients = scale_coefficients,
        one_hot_levels = one_hot_levels,
        remove_features = remove_features
      )
    )
  }
) # /Preprocessor


# %% repr.Preprocessor ----
method(repr, Preprocessor) <- function(
  x,
  pad = 0L,
  print_df = FALSE,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name("Preprocessor", pad = pad, output_type = output_type),
    repr_ls(props(x), pad = pad, print_df = print_df)
  )
} # /rtemis::repr.Preprocessor


# %% print.Preprocessor ----
method(print, Preprocessor) <- function(x, pad = 0L, output_type = NULL, ...) {
  cat(repr(x, output_type = output_type))
  invisible(x)
} # /rtemis::print.Preprocessor


# %% names.Preprocessor ----
method(names, Preprocessor) <- function(x) {
  names(props(x))
}


# %% `$`.Preprocessor ----
# Make props `$`-accessible
method(`$`, Preprocessor) <- function(x, name) {
  props(x)[[name]]
}


# %% `.DollarNames`.Preprocessor ----
# DollarSign tab-complete property names
method(`.DollarNames`, Preprocessor) <- function(x, pattern = "") {
  all_names <- names(props(x))
  grep(pattern, all_names, value = TRUE)
}


# %% `[`.Preprocessor ----
# Make props `[`-accessible
method(`[`, Preprocessor) <- function(x, name) {
  props(x)[[name]]
}


# %% `[[`.Preprocessor ----
# Make props `[[`-accessible
method(`[[`, Preprocessor) <- function(x, name) {
  props(x)[[name]]
}


# %% preprocessed.Preprocessor ----
method(preprocessed, Preprocessor) <- function(x) {
  x@preprocessed
}


================================================
FILE: R/05_Resampler.R
================================================
# S7_Resampler.R
# ::rtemis::
# 2025- EDG rtemis.org

# References
# https://github.com/RConsortium/S7/
# https://rconsortium.github.io/S7

# Description
# `ResamplerConfig` class and subclasses create objects that store resampling configuration.
# They are set by `setup_Resampler()` and perform type checking and validation.
# They are used by `resample()`.
# `Resampler` class stores resamples and their configuration.
# `Resampler` objects are created by `resample()`.

# Note: `id_strat` is used by `resample()`, not individual resamplers

# %% ResamplerConfig ----
#' @title ResamplerConfig
#'
#' @description
#' Superclass for resampler configuration.
#'
#' @field type Character: Type of resampler.
#' @field n Integer: Number of resamples.
#'
#' @author EDG
#' @noRd
ResamplerConfig <- new_class(
  name = "ResamplerConfig",
  properties = list(
    type = class_character,
    n = class_integer # scalar_int_pos
  ),
  constructor = function(type, n) {
    # LOOCV does not have a defined number of resamples, so n can be NA_integer_
    n <- clean_posint(n, allow_na = TRUE)
    new_object(
      S7_object(),
      type = type,
      n = n
    )
  }
) # /rtemis::ResamplerConfig


# %% `$`.ResamplerConfig ----
# Make S7 properties `$`-accessible
method(`$`, ResamplerConfig) <- function(x, name) {
  prop(x, name)
}


# %% `[[`.ResamplerConfig ----
# Make S7 properties `[[`-accessible
method(`[[`, ResamplerConfig) <- function(x, name) {
  prop(x, name)
}


# %% repr.ResamplerConfig ----
#' repr ResamplerConfig
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(repr, ResamplerConfig) <- function(x, pad = 0L, output_type = NULL) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name(x, pad = pad, output_type = output_type),
    repr_ls(
      props(x)[-1],
      pad = pad,
      print_class = FALSE,
      output_type = output_type
    )
  )
} # /rtemis::repr.ResamplerConfig


# %% print.ResamplerConfig ----
#' Print ResamplerConfig
#'
#' @description
#' print ResamplerConfig object
#'
#' @param x ResamplerConfig object
#'
#' @author EDG
#' @noRd
method(print, ResamplerConfig) <- function(
  x,
  pad = 0L,
  output_type = c("ansi", "html", "plain"),
  ...
) {
  cat(repr(x, pad = pad, output_type = output_type))
  invisible(x)
} # /rtemis::print.ResamplerConfig


# %% desc.ResamplerConfig ----
method(desc, ResamplerConfig) <- function(x) {
  switch(
    x@type,
    KFold = paste0(x@n, " independent folds"),
    StratSub = paste0(x@n, " stratified subsamples"),
    StratBoot = paste0(x@n, " stratified bootstraps"),
    Bootstrap = paste0(x@n, " bootstrap resamples"),
    Custom = paste0(x@n, " custom resamples"),
    LOOCV = paste0(x@n, " leave-one-out folds"),
    paste0(x@n, " resamples")
  )
} # /rtemis::desc.ResamplerConfig


# %% KFoldConfig ----
#' @title KFoldConfig
#'
#' @description
#' ResamplerConfig subclass for k-fold resampling.
#'
#' @author EDG
#' @noRd
KFoldConfig <- new_class(
  name = "KFoldConfig",
  parent = ResamplerConfig,
  properties = list(
    stratify_var = class_character | NULL,
    strat_n_bins = scalar_int_pos,
    id_strat = class_vector | NULL,
    seed = scalar_int_pos
  ),
  constructor = function(n, stratify_var, strat_n_bins, id_strat, seed) {
    new_object(
      ResamplerConfig(
        type = "KFold",
        n = n
      ),
      stratify_var = stratify_var,
      strat_n_bins = strat_n_bins,
      id_strat = id_strat,
      seed = seed
    )
  }
) # /rtemis::KFoldConfig


# %% StratSubConfig ----
#' @title StratSubConfig
#'
#' @description
#' ResamplerConfig subclass for stratified subsampling.
#'
#' @author EDG
#' @noRd
StratSubConfig <- new_class(
  name = "StratSubConfig",
  parent = ResamplerConfig,
  properties = list(
    n = scalar_int_pos,
    train_p = scalar_dbl_01excl,
    stratify_var = class_character | NULL,
    strat_n_bins = scalar_int_pos,
    id_strat = class_vector | NULL,
    seed = scalar_int_pos
  ),
  constructor = function(
    n,
    train_p,
    stratify_var,
    strat_n_bins,
    id_strat,
    seed
  ) {
    new_object(
      ResamplerConfig(
        type = "StratSub",
        n = n
      ),
      train_p = train_p,
      stratify_var = stratify_var,
      strat_n_bins = strat_n_bins,
      id_strat = id_strat,
      seed = seed
    )
  }
) # /rtemis::StratSubConfig


# %% StratBootConfig ----
#' @title StratBootConfig
#'
#' @description
#' ResamplerConfig subclass for stratified bootstrapping.
#'
#' @author EDG
#' @noRd
StratBootConfig <- new_class(
  name = "StratBootConfig",
  parent = ResamplerConfig,
  properties = list(
    stratify_var = class_character | NULL,
    train_p = scalar_dbl_01excl,
    strat_n_bins = scalar_int_pos,
    target_length = scalar_int_pos,
    id_strat = class_vector | NULL,
    seed = scalar_int_pos
  ),
  constructor = function(
    n,
    stratify_var,
    train_p,
    strat_n_bins,
    target_length,
    id_strat,
    seed
  ) {
    new_object(
      ResamplerConfig(
        type = "StratBoot",
        n = n
      ),
      stratify_var = stratify_var,
      train_p = train_p,
      strat_n_bins = strat_n_bins,
      target_length = target_length,
      id_strat = id_strat,
      seed = seed
    )
  }
) # /rtemis::StratBootConfig


# %% BootstrapConfig ----
#' @title BootstrapConfig
#'
#' @description
#' ResamplerConfig subclass for bootstrap resampling.
#'
#' @author EDG
#' @noRd
BootstrapConfig <- new_class(
  name = "BootstrapConfig",
  parent = ResamplerConfig,
  properties = list(
    id_strat = class_vector | NULL,
    seed = scalar_int_pos
  ),
  constructor = function(n, id_strat, seed) {
    new_object(
      ResamplerConfig(
        type = "Bootstrap",
        n = n
      ),
      id_strat = id_strat,
      seed = seed
    )
  }
) # /rtemis::BootstrapConfig


# %% LOOCVConfig ----
#' @title LOOCVConfig
#'
#' @description
#' ResamplerConfig subclass for leave-one-out cross-validation.
#'
#' @author EDG
#' @noRd
LOOCVConfig <- new_class(
  name = "LOOCVConfig",
  parent = ResamplerConfig,
  constructor = function(n) {
    new_object(
      ResamplerConfig(
        type = "LOOCV",
        n = n
      )
    )
  }
) # /rtemis::LOOCVConfig


# %% CustomConfig ----
#' @title CustomConfig
#'
#' @description
#' ResamplerConfig subclass for custom resampling.
#'
#' @author EDG
#' @noRd
CustomConfig <- new_class(
  name = "CustomConfig",
  parent = ResamplerConfig,
  constructor = function(n) {
    new_object(
      ResamplerConfig(
        type = "Custom",
        n = n
      )
    )
  }
) # /rtemis::CustomConfig


# %% setup_Resampler ----
#' Setup Resampler
#'
#' @param n_resamples Integer: Number of resamples to make.
#' @param type Character: Type of resampler: "KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"
#' @param stratify_var Character: Variable to stratify by.
#' @param train_p Float: Training set percentage.
#' @param strat_n_bins Integer: Number of bins to stratify by.
#' @param target_length Integer: Target length for stratified bootstraps.
#' @param id_strat Integer: Vector of indices to stratify by. These may be, for example, case IDs
#' if your dataset contains repeated measurements. By specifying this vector, you can ensure that
#' each case can only be present in the training or test set, but not both.
#' @param seed Integer: Random seed.
#' @param verbosity Integer: Verbosity level.
#'
#' @return ResamplerConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' tenfold_resampler <- setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L)
#' tenfold_resampler
setup_Resampler <- function(
  n_resamples = 10L,
  type = c("KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"),
  # index = NULL,
  # group = NULL,
  stratify_var = NULL,
  train_p = .75,
  strat_n_bins = 4L,
  target_length = NULL,
  id_strat = NULL,
  seed = NULL,
  verbosity = 1L
) {
  # Arguments
  type <- match_arg(
    type,
    c("KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV")
  )
  if (length(type) == 0) {
    cli::cli_abort(
      "Invalid resampler type. Must be one of: 'StratSub', 'StratBoot', 'KFold', 'Bootstrap', 'LOOCV'"
    )
  }
  seed <- clean_int(seed)

  if (type == "KFold") {
    KFoldConfig(
      n = n_resamples,
      stratify_var = stratify_var,
      strat_n_bins = strat_n_bins,
      id_strat = id_strat,
      seed = seed
    )
  } else if (type == "StratSub") {
    StratSubConfig(
      n = n_resamples,
      train_p = train_p,
      stratify_var = stratify_var,
      strat_n_bins = strat_n_bins,
      id_strat = id_strat,
      seed = seed
    )
  } else if (type == "StratBoot") {
    StratBootConfig(
      n = n_resamples,
      train_p = train_p,
      stratify_var = stratify_var,
      strat_n_bins = strat_n_bins,
      target_length = target_length,
      id_strat = id_strat,
      seed = seed
    )
  } else if (type == "Bootstrap") {
    BootstrapConfig(
      n = n_resamples,
      id_strat = id_strat,
      seed = seed
    )
  } else if (type == "LOOCV") {
    LOOCVConfig(
      n = NA_integer_
    )
  } else {
    cli::cli_abort(paste(
      "Resampler'",
      type,
      "'is not supported.",
      "Supported types are: 'KFold', 'StratSub', 'StratBoot', 'Bootstrap', 'LOOCV'."
    ))
  }
} # /rtemis::setup_Resampler


# %% Resampler ----
#' @title Resampler
#'
#' @description
#' Class for resampling objects.
#'
#' @author EDG
#' @noRd
Resampler <- new_class(
  name = "Resampler",
  properties = list(
    type = class_character,
    resamples = class_list,
    config = ResamplerConfig
  )
) # /rtemis::Resampler


# %% repr.Resampler ----
#' repr Resampler
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(repr, Resampler) <- function(x, pad = 0L, output_type = NULL) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name(x, pad = pad, output_type = output_type),
    repr_ls(
      props(x),
      pad = pad,
      print_class = FALSE,
      output_type = output_type
    )
  )
} # /rtemis::repr.Resampler


# %% print.Resampler ----
method(print, Resampler) <- function(
  x,
  output_type = c("ansi", "html", "plain"),
  ...
) {
  cat(repr(x, output_type = output_type))
  invisible(x)
}

# %% names.Resampler ----
method(names, Resampler) <- function(x) {
  names(x@resamples)
}


# %% `$`.Resampler ----
# Access Resampler$resamples resamples using `$` ----
method(`$`, Resampler) <- function(x, name) {
  x@resamples[[name]]
}


# %% `.DollarNames`.Resampler ----
# DollarSign tab-complete Resampler@resamples names
method(`.DollarNames`, Resampler) <- function(x, pattern = "") {
  all_names <- names(x@resamples)
  grep(pattern, all_names, value = TRUE)
}


# %% `[[`.Resampler ----
# Access Resampler$resamples resamples using `[[` ----
method(`[[`, Resampler) <- function(x, index) {
  x@resamples[[index]]
}


# %% desc.Resampler ----
method(desc, Resampler) <- function(x) {
  desc(x@config)
}


# %% --- Internal functions ----

# %% .list_to_ResamplerConfig ----
#' Convert a list to a ResamplerConfig object
#'
#' Internal function used by `rtemis.server` and `SuperConfig` deserialization
#' to reconstruct a `ResamplerConfig` object from a named list. Not intended
#' for direct use by end users.
#'
#' @param x Named list with the following elements:
#'   \describe{
#'     \item{`type`}{Character: resampler type — one of `"KFold"`,
#'       `"StratSub"`, `"StratBoot"`, `"Bootstrap"`, `"LOOCV"`, `"Custom"`.}
#'     \item{`n`}{Integer: number of resamples (not used for `"LOOCV"`).}
#'     \item{`train_p`}{Numeric: training proportion (used by `"StratSub"` and
#'       `"StratBoot"`).}
#'     \item{`stratify_var`}{Character or `NULL`: stratification variable name.}
#'     \item{`strat_n_bins`}{Integer: number of bins for stratification.}
#'     \item{`target_length`}{Integer or `NULL`: target resample length
#'       (`"StratBoot"` only).}
#'     \item{`id_strat`}{Character or `NULL`: ID stratification variable.}
#'     \item{`seed`}{Integer or `NULL`: random seed.}
#'   }
#'
#' @return A `ResamplerConfig` object of the appropriate subtype.
#'
#' @author EDG
#' @keywords internal
#' @export
.list_to_ResamplerConfig <- function(x) {
  switch(
    x[["type"]],
    KFold = KFoldConfig(
      n = x[["n"]],
      stratify_var = x[["stratify_var"]],
      strat_n_bins = x[["strat_n_bins"]],
      id_strat = x[["id_strat"]],
      seed = x[["seed"]]
    ),
    StratSub = StratSubConfig(
      n = x[["n"]],
      train_p = x[["train_p"]],
      stratify_var = x[["stratify_var"]],
      strat_n_bins = x[["strat_n_bins"]],
      id_strat = x[["id_strat"]],
      seed = x[["seed"]]
    ),
    StratBoot = StratBootConfig(
      n = x[["n"]],
      train_p = x[["train_p"]],
      stratify_var = x[["stratify_var"]],
      strat_n_bins = x[["strat_n_bins"]],
      target_length = x[["target_length"]],
      id_strat = x[["id_strat"]],
      seed = x[["seed"]]
    ),
    Bootstrap = BootstrapConfig(
      n = x[["n"]],
      id_strat = x[["id_strat"]],
      seed = x[["seed"]]
    ),
    LOOCV = LOOCVConfig(
      n = NA_integer_
    ),
    Custom = CustomConfig(
      n = x[["n"]]
    )
  )
} # /rtemis::.list_to_ResamplerConfig


================================================
FILE: R/06_Tuner.R
================================================
# S7_Tuner.R
# ::rtemis::
# 2025- EDG rtemis.org

# References
# S7
# https://github.com/RConsortium/S7/
# https://rconsortium.github.io/S7
# future
# https://www.futureverse.org/backends.html

# Description
# `TunerConfig` class and subclasses create objects that store tuner config.
# They are set by `setup_GridSearch()` and perform type checking and validation.
# They are used by `tune()`.
# `Tuner` class and subclasses create objects that store tuning results.
# They are created by `tune()`.

# Dev
# Should both class constructors (e.g. GridSearch@constructor) and setup functions
# (e.g. setup_GridSearch) perform type checking and validation?

# %% TunerConfig ----
#' TunerConfig
#'
#' Superclass for tuner config.
#'
#' @field type Character: Type of tuner.
#' @field config Named list of tuner config.
#'
#' @author EDG
#' @noRd
TunerConfig <- new_class(
  name = "TunerConfig",
  properties = list(
    type = class_character,
    config = class_list
  )
) # /rtemis::TunerConfig


# %% repr.TunerConfig ----
method(repr, TunerConfig) <- function(
  x,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name(
      paste(x@type, "TunerConfig"),
      pad = pad,
      output_type = output_type
    ),
    repr_ls(x@config, pad = pad, output_type = output_type)
  )
} # /rtemis::repr.TunerConfig


# %% print.TunerConfig ----
method(print, TunerConfig) <- function(x, pad = 0L, ...) {
  cat(repr(x, pad = pad), "\n")
  invisible(x)
}

# %% desc.TunerConfig ----
method(desc, TunerConfig) <- function(x) {
  if (x@type == "GridSearch") {
    paste(x@config[["search_type"]], "grid search")
  }
}


# %% `$`.TunerConfig ----
# Make TunerConfig@config `$`-accessible
method(`$`, TunerConfig) <- function(x, name) {
  x@config[[name]]
}


# %% `.DollarNames`.TunerConfig ----
# `$`-autocomplete TunerConfig@config
method(`.DollarNames`, TunerConfig) <- function(x, pattern = "") {
  all_names <- names(x@config)
  grep(pattern, all_names, value = TRUE)
}


# %% `[[`.TunerConfig ----
# Make TunerConfig@config `[[`-accessible
method(`[[`, TunerConfig) <- function(x, name) {
  x@config[[name]]
}


# %% GridSearchConfig ----
#' @title GridSearchConfig
#'
#' @description
#' TunerConfig subclass for grid search config.
#'
#' @author EDG
#' @noRd
GridSearchConfig <- new_class(
  name = "GridSearchConfig",
  parent = TunerConfig,
  constructor = function(
    resampler_config = NULL,
    search_type = NULL,
    randomize_p = NULL,
    metrics_aggregate_fn = NULL,
    metric = NULL,
    maximize = NULL
  ) {
    check_is_S7(resampler_config, ResamplerConfig)
    check_inherits(search_type, "character")
    check_float01exc(randomize_p)
    check_character(metrics_aggregate_fn)
    check_inherits(metric, "character")
    check_inherits(maximize, "logical")
    # Only assign randomize_p if search_type is "randomized"
    params <- list(
      search_type = search_type,
      resampler_config = resampler_config,
      metrics_aggregate_fn = metrics_aggregate_fn,
      metric = metric,
      maximize = maximize
    )
    if (search_type == "randomized") {
      params[["randomize_p"]] <- randomize_p
    }
    new_object(
      TunerConfig(
        type = "GridSearch",
        config = params
      )
    )
  }
) # /rtemis::GridSearchConfig


# %% setup_GridSearch ----
#' Setup Grid Search Config
#'
#' Create a `GridSearchConfig` object that can be passed to [train].
#'
#' @param resampler_config `ResamplerConfig` set by [setup_Resampler].
#' @param search_type Character: "exhaustive" or "randomized". Type of
#' grid search to use. Exhaustive search will try all combinations of
#' config. Randomized will try a random sample of size
#' `randomize_p` * `N of total combinations`
#' @param randomize_p Float (0, 1): For `search_type == "randomized"`,
#' randomly test this proportion of combinations.
#' @param metrics_aggregate_fn Character: Name of function to use to aggregate error metrics.
#' @param metric Character: Metric to minimize or maximize.
#' @param maximize Logical: If TRUE, maximize `metric`, otherwise minimize it.
#'
#' @return A `GridSearchConfig` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' gridsearch_config <- setup_GridSearch(
#'   resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold"),
#'   search_type = "exhaustive"
#' )
#' gridsearch_config
setup_GridSearch <- function(
  resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold"),
  search_type = "exhaustive",
  randomize_p = NULL,
  metrics_aggregate_fn = "mean",
  metric = NULL,
  maximize = NULL
) {
  # Arguments ----
  check_is_S7(resampler_config, ResamplerConfig)
  check_inherits(search_type, "character")
  check_float01exc(randomize_p)
  if (search_type == "exhaustive" && !is.null(randomize_p)) {
    cli::cli_abort("search_type is 'exhaustive': do not set randomize_p.")
  }
  # check_inherits(metrics_aggregate_fn, "function")
  check_character(metrics_aggregate_fn)
  check_inherits(metric, "character")
  check_inherits(maximize, "logical")
  GridSearchConfig(
    resampler_config = resampler_config,
    search_type = search_type,
    randomize_p = randomize_p,
    metrics_aggregate_fn = metrics_aggregate_fn,
    metric = metric,
    maximize = maximize
  )
} # /rtemis::setup_GridSearch


# %% Tuner ----
#' Tuner Class
#'
#' @field type Character: Type of tuner.
#' @field hyperparameters Named list of tunable and fixed hyperparameters.
#' @field tuning_results Data.frame: Tuning results.
#' @field best_hyperparameters Named list of best hyperparameter values. Includes only
#' hyperparameters that were tuned.
#'
#' @author EDG
#' @noRd
Tuner <- new_class(
  name = "Tuner",
  properties = list(
    type = class_character,
    hyperparameters = Hyperparameters,
    tuner_config = TunerConfig,
    tuning_results = class_list, # with 2 elements: metrics_training, metrics_validation
    best_hyperparameters = class_list
  )
) # /rtemis::Tuner


# %% desc.Tuner ----
method(desc, Tuner) <- function(x) {
  if (x@type == "GridSearch") {
    paste(x@tuner_config[["search_type"]], "grid search")
  }
} # /rtemis::describe.Tuner


# %% GridSearch ----
#' GridSearch Class
#'
#' Tuner subclass for grid search.
#'
#' @author EDG
#' @noRd
GridSearch <- new_class(
  name = "GridSearch",
  parent = Tuner,
  constructor = function(
    hyperparameters,
    tuner_config,
    tuning_results,
    best_hyperparameters
  ) {
    type <- "GridSearch"
    new_object(
      Tuner(
        type = type,
        hyperparameters = hyperparameters,
        tuner_config = tuner_config,
        tuning_results = tuning_results,
        best_hyperparameters = best_hyperparameters
      )
    )
  }
) # /rtemis::GridSearch


# print.GridSearch ----
#' Print GridSearch
#'
#' Print GridSearch object
#'
#' @param x GridSearch object.
#' @param header Logical: If TRUE, print header with type of tuner.
#' @param ... Not used.
#'
#' @author EDG
#' @noRd
method(print, GridSearch) <- function(x, header = TRUE, ...) {
  if (header) {
    objcat(paste(x@type))
  }
  type <- if (x@tuner_config[["search_type"]] == "exhaustive") {
    "An exhaustive grid search"
  } else {
    paste0(
      "A randomized grid search (p = ",
      x@tuner_config[["randomize_p"]],
      ")"
    )
  }
  n_param_combs <- NROW(x@tuning_results[["param_grid"]])
  cat(
    type,
    " of ",
    singorplu(n_param_combs, "parameter combination"),
    " was performed using ",
    desc(x@tuner_config[["resampler_config"]]),
    ".\n",
    sep = ""
  )
  cat(
    x@tuner_config[["metric"]],
    "was",
    ifelse(x@tuner_config[["maximize"]], "maximized", "minimized"),
    "with the following config:\n"
  )
  printls(x@best_hyperparameters)
  invisible(x)
} # /rtemis::print.GridSearch


# %% repr.GridSearch ----
method(repr, GridSearch) <- function(
  x,
  header = TRUE,
  pad = 0L,
  output_type = c("ansi", "html", "plain"),
  ...
) {
  output_type <- match.arg(output_type)
  out <- character()
  if (header) {
    out <- paste0(out, repr_S7name(x@type, pad = pad), "\n")
  }
  type <- if (x@tuner_config[["search_type"]] == "exhaustive") {
    "An exhaustive grid search"
  } else {
    paste0(
      "A randomized grid search (p = ",
      x@tuner_config[["randomize_p"]],
      ")"
    )
  }
  n_param_combs <- NROW(x@tuning_results[["param_grid"]])
  out <- paste0(
    out,
    type,
    " of ",
    singorplu(n_param_combs, "parameter combination"),
    " was performed using ",
    desc(x@tuner_config[["resampler_config"]]),
    ".\n"
  )
  out <- paste(
    out,
    x@tuner_config[["metric"]],
    "was",
    ifelse(x@tuner_config[["maximize"]], "maximized", "minimized"),
    "with the following config:\n"
  )
  out <- paste(
    out,
    repr_ls(x@best_hyperparameters, pad = pad, output_type = output_type),
    sep = ""
  )
  out
} # /rtemis::repr.GridSearch


# %% .list_to_TunerConfig ----
#' Convert a list to a TunerConfig object
#'
#' Internal function used by `rtemis.server` and `SuperConfig` deserialization
#' to reconstruct a `TunerConfig` object from a named list. Not intended for
#' direct use by end users.
#'
#' @param x Named list with two elements:
#'   \describe{
#'     \item{`type`}{Character: tuner type. Currently only `"GridSearch"` is
#'       supported.}
#'     \item{`config`}{Named list of tuner configuration fields. For
#'       `"GridSearch"`: `resampler_config` (a list accepted by
#'       [.list_to_ResamplerConfig()]), `search_type`, `randomize_p`,
#'       `metrics_aggregate_fn`, `metric`, and `maximize`.}
#'   }
#'
#' @return A `TunerConfig` object (currently a `GridSearchConfig`).
#'
#' @author EDG
#' @keywords internal
#' @export
.list_to_TunerConfig <- function(x) {
  if (x[["type"]] == "GridSearch") {
    setup_GridSearch(
      resampler_config = .list_to_ResamplerConfig(x[["config"]][[
        "resampler_config"
      ]]),
      search_type = x[["config"]][["search_type"]],
      randomize_p = x[["config"]][["randomize_p"]],
      metrics_aggregate_fn = x[["config"]][["metrics_aggregate_fn"]],
      metric = x[["config"]][["metric"]],
      maximize = x[["config"]][["maximize"]]
    )
  } else {
    cli::cli_abort("Unsupported tuner type: {x[['type']]}")
  }
} # /rtemis::.list_to_TunerConfig


================================================
FILE: R/07_Supervised.R
================================================
# S7_Supervised.R
# ::rtemis::
# 2025- EDG rtemis.org

# References
# https://github.com/RConsortium/S7/
# https://rconsortium.github.io/S7
# https://rconsortium.github.io/S7/articles/classes-objects.html?q=computed#computed-properties
# https://utf8-icons.com/

# %% VariableImportance ----
#' @title VariableImportance
#'
#' @description
#' Class for variable importance objects. Allows for one or more variable importance measures,
#' stored in a data.table with columns "variable", and at least one
#' more column with a descriptive name.
#'
#' @author EDG
#' @noRd
VariableImportance <- new_class(
  name = "VariableImportance",
  properties = list(
    data = class_data.table
  ),
  validator = function(self) {
    # Must include at least two columns
    if (NCOL(self@data) < 2L) {
      cli::cli_abort(
        "Variable importance data must include at least two columns: 'variable' and at least one importance measure."
      )
    }
    # Must include column "variable" of type character
    if (!"variable" %in% names(self@data)) {
      cli::cli_abort(
        "Variable importance data must include a 'variable' column."
      )
    }
    if (!is.character(self@data[["variable"]])) {
      cli::cli_abort("Column 'variable' must be of type character.")
    }
    # All other columns must be numeric
    other_cols <- setdiff(names(self@data), "variable")
    if (!all(self@data[, sapply(.SD, is.numeric), .SDcols = other_cols])) {
      cli::cli_abort(
        "All columns other than 'variable' must be numeric."
      )
    }
    # Number of rows will be checked by Supervised to be at least as many as
    # the number of predictors.
  }
) # /rtemis::VariableImportance


# %% repr.VariableImportance ----
method(repr, VariableImportance) <- function(x, pad = 0L, output_type = NULL) {
  output_type <- get_output_type(output_type)
  # "N variable importance measures for M predictors"
  n_m <- NCOL(x@data) - 1L
  paste0(
    repr_S7name("VariableImportance", pad = pad, output_type = output_type),
    strrep(" ", pad),
    fmt(n_m, col = highlight_col, bold = TRUE, output_type = output_type),
    ngettext(
      n_m,
      " variable importance measure for ",
      " variable importance measures for "
    ),
    fmt(
      NROW(x@data),
      col = highlight_col,
      bold = TRUE,
      output_type = output_type
    ),
    ngettext(NROW(x@data), " predictor", " predictors")
  )
} # /rtemis::repr.VariableImportance


# %% print.VariableImportance ----
method(print, VariableImportance) <- function(x, output_type = NULL, ...) {
  cat(repr(x, output_type = output_type), "\n")
  invisible(x)
} # /rtemis::print.VariableImportance


# Plot methods
# Supervised: plot_varimp
# SupervisedRes: plot_varimp, plot_metric
# Regression: plot_true_pred,
# Classification: plot_true_pred, plot_roc
# RegressionRes: plot_metric, plot_true_pred,
# ClassificationRes: plot_metric, plot_true_pred, plot_roc

# %% Supervised ----
#' @title Supervised
#'
#' @description
#' Superclass for supervised learning models.
#'
#' @author EDG
#' @noRd
Supervised <- new_class(
  name = "Supervised",
  properties = list(
    algorithm = class_character,
    model = class_any,
    type = class_character,
    preprocessor = Preprocessor | NULL,
    preprocessor_internal = Preprocessor | NULL,
    hyperparameters = Hyperparameters | NULL,
    tuner = Tuner | NULL,
    execution_config = ExecutionConfig,
    y_training = class_any,
    y_validation = class_any,
    y_test = class_any,
    predicted_training = class_any,
    predicted_validation = class_any,
    predicted_test = class_any,
    metrics_training = Metrics,
    metrics_validation = Metrics | NULL,
    metrics_test = Metrics | NULL,
    xnames = class_character,
    varimp = VariableImportance | NULL,
    question = class_character | NULL,
    extra = class_any,
    session_info = class_any
  ),
  constructor = function(
    algorithm,
    model,
    type,
    preprocessor,
    preprocessor_internal,
    hyperparameters,
    tuner,
    execution_config,
    y_training,
    y_validation,
    y_test,
    predicted_training,
    predicted_validation,
    predicted_test,
    metrics_training,
    metrics_validation,
    metrics_test,
    xnames,
    varimp,
    question,
    extra
  ) {
    new_object(
      S7_object(),
      algorithm = algorithm,
      model = model,
      type = type,
      preprocessor = preprocessor,
      preprocessor_internal = preprocessor_internal,
      hyperparameters = hyperparameters,
      tuner = tuner,
      execution_config = execution_config,
      y_training = y_training,
      y_validation = y_validation,
      y_test = y_test,
      predicted_training = predicted_training,
      predicted_validation = predicted_validation,
      predicted_test = predicted_test,
      metrics_training = metrics_training,
      metrics_validation = metrics_validation,
      metrics_test = metrics_test,
      xnames = xnames,
      varimp = varimp,
      question = question,
      extra = extra,
      session_info = sessionInfo()
    )
  }
) # /rtemis::Supervised


# %% predict.Supervised ----
#' Predict `Supervised`
#'
#' Predict Method for `Supervised` objects
#'
#' @param object `Supervised` object.
#' @param newdata data.frame or similar: New data to predict.
#'
#' @noRd
method(predict, Supervised) <- function(object, newdata, verbosity = 1L, ...) {
  check_inherits(newdata, "data.frame")

  # Apply user-specified preprocessor if available
  if (!is.null(object@preprocessor)) {
    newdata <- preprocess(
      newdata,
      object@preprocessor,
      verbosity = verbosity
    ) |>
      preprocessed()
  }

  # Apply algorithm-specific preprocessor if available
  if (!is.null(object@preprocessor_internal)) {
    newdata <- preprocess(
      newdata,
      object@preprocessor_internal,
      verbosity = verbosity
    ) |>
      preprocessed()
  }

  # After preprocessing, enforce strict predictor names and order
  if (!identical(names(newdata), object@xnames)) {
    extra_cols <- setdiff(names(newdata), object@xnames)
    missing_cols <- setdiff(object@xnames, names(newdata))
    cli::cli_abort(c(
      "x" = "Predictor names and order in newdata must exactly match training data.",
      "i" = "Expected {length(object@xnames)} columns; got {NCOL(newdata)}.",
      "i" = if (length(extra_cols) > 0L) {
        paste0("Unexpected columns: ", paste(extra_cols, collapse = ", "))
      } else {
        "Unexpected columns: none."
      },
      "i" = if (length(missing_cols) > 0L) {
        paste0("Missing columns: ", paste(missing_cols, collapse = ", "))
      } else {
        "Missing columns: none."
      }
    ))
  }

  # Call predict_super with fully preprocessed data
  predict_super(
    model = object@model,
    newdata = newdata,
    type = object@type,
    verbosity = verbosity
  )
} # /rtemis::predict.Supervised


# %% fitted.Supervised ----
#' Fitted `Supervised`
#'
#' Fitted Method for `Supervised` objects
#'
#' @param object `Supervised` object.
#'
#' @keywords internal
#' @noRd
method(fitted, Supervised) <- function(object, ...) {
  object@predicted_training
} # /rtemis::fitted.Supervised


# %% se.Supervised ----
#' Standard Error `Supervised`
#'
#' Standard Error Method for `Supervised` objects
#'
#' @param object `Supervised` object.
#'
#' @keywords internal
#' @noRd
method(se, Supervised) <- function(x, ...) {
  x@se_training
}


# %% `$`.Supervised ----
# Make Supervised props `$`- accessible
method(`$`, Supervised) <- function(x, name) {
  prop(x, name)
}


# %% `.DollarNames`.Supervised ----
# `$`-autocomplete Supervised props
method(`.DollarNames`, Supervised) <- function(x, pattern = "") {
  all_names <- names(props(x))
  grep(pattern, all_names, value = TRUE)
}


# %% `[[`.Supervised ----
# Make Supervised props `[[`- accessible ----
method(`[[`, Supervised) <- function(x, name) {
  prop(x, name)
}


# %% repr.Supervised ----
#' repr `Supervised`
#'
#' @param x `Supervised` object.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
method(repr, Supervised) <- function(
  x,
  pad = 0L,
  output_type = NULL,
  filename = NULL
) {
  output_type <- get_output_type(output_type, filename)
  # Class name
  out <- paste0(
    repr_S7name(x@type, pad = pad, output_type = output_type),
    highlight(x@algorithm, output_type = output_type),
    " (",
    desc_alg(x@algorithm),
    ")\n"
  )

  # Tuning, if available
  if (!is.null(x@tuner)) {
    out <- paste0(
      out,
      fmt(
        "\U2699",
        col = col_tuner,
        bold = TRUE,
        pad = pad,
        output_type = output_type
      ),
      " Tuned using ",
      desc(x@tuner),
      ".\n"
    )
  }

  # Calibration, if available
  if (prop_exists(x, "calibration_model")) {
    out <- paste0(
      out,
      fmt(
        "\U27CB",
        col = col_calibrator,
        bold = TRUE,
        pad = pad,
        output_type = output_type
      ),
      " Calibrated using ",
      desc_alg(x@calibration_model@algorithm),
      ".\n"
    )
  }

  out <- paste0(out, "\n")

  # {Regression, Classification} vs. CalibratedClassification
  if (prop_exists(x, "calibration_model")) {
    # CalibratedClassification
    # Metrics, training
    out <- paste0(
      out,
      # repr(x@metrics_training, pad = 2L, output_type = output_type)
      repr_CalibratedClassificationMetrics(
        x@metrics_training,
        x@metrics_training_calibrated,
        pad = pad + 2L,
        output_type = output_type
      )
    )

    # Metrics, validation
    if (length(x@metrics_validation) > 0) {
      out <- paste0(
        out,
        repr_CalibratedClassificationMetrics(
          x@metrics_validation,
          x@metrics_validation_calibrated,
          pad = pad + 2L,
          output_type = output_type
        )
      )
    }

    # Metrics, test
    if (length(x@metrics_test) > 0) {
      out <- paste0(
        out,
        "\n",
        repr_CalibratedClassificationMetrics(
          x@metrics_test,
          x@metrics_test_calibrated,
          pad = pad + 2L,
          output_type = output_type
        )
      )
    }
  } else {
    # {Regression, Classification}

    # Metrics, training
    out <- paste0(
      out,
      repr(x@metrics_training, pad = pad + 2L, output_type = output_type)
    )

    # Metrics, validation
    if (length(x@metrics_validation) > 0) {
      out <- paste0(
        out,
        repr(x@metrics_validation, pad = pad + 2L, output_type = output_type)
      )
    }

    # Metrics, test
    if (length(x@metrics_test) > 0) {
      out <- paste0(
        out,
        "\n",
        repr(x@metrics_test, pad = pad + 2L, output_type = output_type)
      )
    }
  }

  out
} # /rtemis::repr.Supervised


# %% to_json.Supervised ----
#' to_json `Supervised`
#'
#' Convert a `Supervised` (or `Regression` / `Classification` /
#' `CalibratedClassification`) object to a JSON-serializable list. Excludes
#' the model object, full prediction vectors, full outcome vectors, the
#' R session_info, and `extra` — all of which are either not JSON-friendly,
#' too large for the control-plane response, or fetched separately as
#' Arrow IPC bulk data.
#'
#' @param x `Supervised` object.
#'
#' @return Named list. Pass to `jsonlite::toJSON(auto_unbox = TRUE)`.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(to_json, Supervised) <- function(x, ...) {
  # Use `.to_json_value()` for every prop — it handles nested S7 objects,
  # nested lists containing S7 objects, and primitive types uniformly.
  # That matters when a prop's *declared* type is S7 but the actual value
  # is a primitive (e.g. `varimp` is sometimes a plain numeric vector
  # rather than a VariableImportance — a rtemis-internal type mismatch
  # that this method must tolerate).
  out <- list(
    .class = S7_class(x)@name,
    algorithm = x@algorithm,
    type = x@type,
    question = x@question,
    xnames = x@xnames,
    n_features = length(x@xnames),
    preprocessor = .to_json_value(x@preprocessor),
    preprocessor_internal = .to_json_value(x@preprocessor_internal),
    hyperparameters = .to_json_value(x@hyperparameters),
    tuner = .to_json_value(x@tuner),
    execution_config = .to_json_value(x@execution_config),
    metrics_training = .to_json_value(x@metrics_training),
    metrics_validation = .to_json_value(x@metrics_validation),
    metrics_test = .to_json_value(x@metrics_test),
    varimp = .to_json_value(x@varimp)
  )

  # Subclass-specific extras
  if (prop_exists(x, "binclasspos")) {
    out[["binclasspos"]] <- x@binclasspos
  }
  if (prop_exists(x, "se_training")) {
    # Regression: don't serialize full SE vectors (large); flag presence only
    out[["has_se"]] <- !is.null(x@se_training)
  }
  if (prop_exists(x, "calibration_model")) {
    out[["calibration_model"]] <- .to_json_value(x@calibration_model)
    if (prop_exists(x, "metrics_training_calibrated")) {
      out[["metrics_training_calibrated"]] <-
        .to_json_value(x@metrics_training_calibrated)
    }
    if (prop_exists(x, "metrics_validation_calibrated")) {
      out[["metrics_validation_calibrated"]] <-
        .to_json_value(x@metrics_validation_calibrated)
    }
    if (prop_exists(x, "metrics_test_calibrated")) {
      out[["metrics_test_calibrated"]] <-
        .to_json_value(x@metrics_test_calibrated)
    }
  }

  Filter(Negate(is.null), out)
} # /rtemis::to_json.Supervised


# %% print.Supervised ----
method(print, Supervised) <- function(
  x,
  output_type = c("ansi", "html", "plain"),
  ...
) {
  cat(repr(x, output_type = output_type))
  invisible(x)
} # /rtemis::print.Supervised


# %% describe.Supervised ----
#' Describe `Supervised` object
#'
#' @param x `Supervised` object.
#' @param ... Not used.
#'
#' @return Character string describing the `Supervised` object, invisibly.
#'
#' @author EDG
#' @noRd
#'
#' @examples
#' species_lightrf <- train(iris, algorithm = "lightrf")
#' describe(species_lightrf)
method(describe, Supervised) <- function(x) {
  type <- x@type
  algorithm <- desc_alg(x@algorithm)
  cat(algorithm, " was used for ", tolower(type), ".\n", sep = "")
  desc <- paste0(algorithm, " was used for ", tolower(type), ".")

  # Tuning ----
  if (length(x@tuner) > 0) {
    describe(x@tuner)
  }

  # Metrics ----
  if (type == "Classification") {
    cat(
      "Balanced accuracy was",
      ddSci(x@metrics_training[["Overall"]][["Balanced_Accuracy"]]),
      "on the training set"
    )
    desc <- paste(
      desc,
      "Balanced accuracy was",
      ddSci(x@metrics_training[["Overall"]][["Balanced_Accuracy"]]),
      "in the training set"
    )
    if (!is.null(x@metrics_test[["Overall"]][["Balanced_Accuracy"]])) {
      cat(
        " and",
        ddSci(x@metrics_test[["Overall"]][["Balanced_Accuracy"]]),
        "in the test set."
      )
      desc <- paste(
        desc,
        "and",
        ddSci(x@metrics_test[["Overall"]][["Balanced_Accuracy"]]),
        "in the test set."
      )
    } else {
      cat(".")
      desc <- paste0(desc, ".")
    }
  } else if (type == "Regression") {
    cat(
      "R-squared was",
      ddSci(x@metrics_training[["Rsq"]]),
      "in the training set"
    )
    desc <- paste(
      desc,
      "R-squared was",
      ddSci(x@metrics_training[["Rsq"]]),
      "on the training set"
    )
    if (!is.null(x@metrics_test[["Rsq"]])) {
      cat(
        " and",
        ddSci(x@metrics_test[["Rsq"]]),
        "in the test."
      )
      desc <- paste(
        desc,
        "and",
        ddSci(x@metrics_test[["Rsq"]]),
        "on the test set."
      )
    } else {
      cat(".")
      desc <- paste0(desc, ".")
    }
  }
  cat("\n")
  invisible(desc)
} # /rtemis::describe.Supervised


# %% Classification ----
#' @title Classification
#'
#' @description
#' Supervised subclass for classification models.
#'
#' @author EDG
#' @noRd
Classification <- new_class(
  name = "Classification",
  parent = Supervised,
  properties = list(
    predicted_prob_training = class_double | class_data.frame | NULL,
    predicted_prob_validation = class_double | class_data.frame | NULL,
    predicted_prob_test = class_double | class_data.frame | NULL,
    binclasspos = class_integer
  ),
  constructor = function(
    algorithm = NULL,
    model = NULL,
    preprocessor = NULL, # Preprocessor
    preprocessor_internal = NULL, # Algorithm-specific preprocessor
    hyperparameters = NULL, # Hyperparameters
    tuner = NULL, # Tuner
    execution_config,
    y_training = NULL,
    y_validation = NULL,
    y_test = NULL,
    predicted_training = NULL,
    predicted_validation = NULL,
    predicted_test = NULL,
    xnames = NULL,
    varimp = NULL,
    question = NULL,
    extra = NULL,
    predicted_prob_training = NULL,
    predicted_prob_validation = NULL,
    predicted_prob_test = NULL,
    binclasspos = 2L
  ) {
    metrics_training <- classification_metrics(
      true_labels = y_training,
      predicted_labels = predicted_training,
      predicted_prob = predicted_prob_training,
      sample = "Training"
    )
    metrics_validation <- if (!is.null(y_validation)) {
      classification_metrics(
        true_labels = y_validation,
        predicted_labels = predicted_validation,
        predicted_prob = predicted_prob_validation,
        sample = "Validation"
      )
    } else {
      NULL
    }
    metrics_test <- if (!is.null(y_test)) {
      classification_metrics(
        true_labels = y_test,
        predicted_labels = predicted_test,
        predicted_prob = predicted_prob_test,
        sample = "Test"
      )
    } else {
      NULL
    }
    new_object(
      Supervised(
        algorithm = algorithm,
        model = model,
        type = "Classification",
        preprocessor = preprocessor,
        preprocessor_internal = preprocessor_internal,
        hyperparameters = hyperparameters,
        tuner = tuner,
        execution_config = execution_config,
        y_training = y_training,
        y_validation = y_validation,
        y_test = y_test,
        predicted_training = predicted_training,
        predicted_validation = predicted_validation,
        predicted_test = predicted_test,
        metrics_training = metrics_training,
        metrics_validation = metrics_validation,
        metrics_test = metrics_test,
        xnames = xnames,
        varimp = varimp,
        question = question,
        extra = extra
      ),
      predicted_prob_training = predicted_prob_training,
      predicted_prob_validation = predicted_prob_validation,
      predicted_prob_test = predicted_prob_test,
      binclasspos = binclasspos
    )
  }
) # /rtemis::Classification


# %% CalibratedClassification ----
#' @title CalibratedClassification
#'
#' @description
#' Classification subclass for calibrated classification models.
#' The classification_model can be trained on any data, ideally different from any data used by
#' the classification model.
#'
#' @author EDG
#' @noRd
CalibratedClassification <- new_class(
  name = "CalibratedClassification",
  parent = Classification,
  properties = list(
    calibration_model = Supervised,
    predicted_training_calibrated = class_vector,
    predicted_validation_calibrated = class_vector | NULL,
    predicted_test_calibrated = class_vector | NULL,
    predicted_prob_training_calibrated = class_double,
    predicted_prob_validation_calibrated = class_double | NULL,
    predicted_prob_test_calibrated = class_double | NULL,
    metrics_training_calibrated = Metrics,
    metrics_validation_calibrated = Metrics | NULL,
    metrics_test_calibrated = Metrics | NULL
  ),
  constructor = function(classification_model, calibration_model) {
    # Predict calibrated probabilities of classification model datasets
    predicted_prob_training_calibrated <- predict(
      calibration_model,
      data.frame(
        predicted_probabilities = classification_model@predicted_prob_training
      ),
    )
    predicted_prob_validation_calibrated <- if (
      !is.null(classification_model@predicted_prob_validation)
    ) {
      predict(
        calibration_model,
        data.frame(
          predicted_probabilities = classification_model@predicted_prob_validation
        )
      )
    } else {
      NULL
    }
    predicted_prob_test_calibrated <- if (
      !is.null(classification_model@predicted_prob_test)
    ) {
      predict(
        calibration_model,
        data.frame(
          predicted_probabilities = classification_model@predicted_prob_test
        )
      )
    } else {
      NULL
    }
    # Predict calibrated labels of classification model datasets
    predicted_training_calibrated <- prob2categorical(
      predicted_prob_training_calibrated,
      levels = levels(classification_model@y_training)
    )
    predicted_validation_calibrated <- if (
      !is.null(classification_model@predicted_prob_validation)
    ) {
      prob2categorical(
        predicted_prob_validation_calibrated,
        levels = levels(classification_model@y_validation)
      )
    } else {
      NULL
    }
    predicted_test_calibrated <- if (
      !is.null(classification_model@predicted_prob_test)
    ) {
      prob2categorical(
        predicted_prob_test_calibrated,
        levels = levels(classification_model@y_test)
      )
    } else {
      NULL
    }
    metrics_training_calibrated <- classification_metrics(
      true_labels = classification_model@y_training,
      predicted_labels = predicted_training_calibrated,
      predicted_prob = predicted_prob_training_calibrated,
      sample = "Calibrated Training"
    )
    metrics_validation_calibrated <- if (
      !is.null(classification_model@y_validation)
    ) {
      classification_metrics(
        true_labels = classification_model@y_validation,
        predicted_labels = predicted_validation_calibrated,
        predicted_prob = predicted_prob_validation_calibrated,
        sample = "Calibrated Validation"
      )
    } else {
      NULL
    }
    metrics_test_calibrated <- if (!is.null(classification_model@y_test)) {
      classification_metrics(
        true_labels = classification_model@y_test,
        predicted_labels = predicted_test_calibrated,
        predicted_prob = predicted_prob_test_calibrated,
        sample = "Calibrated Test"
      )
    } else {
      NULL
    }
    new_object(
      classification_model,
      calibration_model = calibration_model,
      predicted_training_calibrated = predicted_training_calibrated,
      predicted_validation_calibrated = predicted_validation_calibrated,
      predicted_test_calibrated = predicted_test_calibrated,
      predicted_prob_training_calibrated = predicted_prob_training_calibrated,
      predicted_prob_validation_calibrated = predicted_prob_validation_calibrated,
      predicted_prob_test_calibrated = predicted_prob_test_calibrated,
      metrics_training_calibrated = metrics_training_calibrated,
      metrics_validation_calibrated = metrics_validation_calibrated,
      metrics_test_calibrated = metrics_test_calibrated
    )
  }
) # /rtemis::CalibratedClassification


# %% predict.CalibratedClassification ----
method(predict, CalibratedClassification) <- function(object, newdata, ...) {
  check_inherits(newdata, "data.frame")
  # Get the classification model's predicted probabilities
  raw_prob <- do_call(
    predict_super,
    list(model = object@model, newdata = newdata, type = "Classification")
  )
  # Get the calibration model's predicted probabilities
  predict(
    object@calibration_model,
    newdata = data.frame(predicted_probabilities = raw_prob)
  )
} # /rtemis::predict.CalibratedClassification

se_compat_algorithms <- c("GLM", "GAM")


# %% Regression ----
#' @title Regression
#' @description
#' Supervised subclass for regression models.
#'
#' @author EDG
#' @noRd
Regression <- new_class(
  name = "Regression",
  parent = Supervised,
  properties = list(
    se_training = class_double | NULL,
    se_validation = class_double | NULL,
    se_test = class_double | NULL
  ),
  constructor = function(
    algorithm = NULL,
    model = NULL,
    preprocessor = NULL, # Preprocessor
    preprocessor_internal = NULL, # Algorithm-specific preprocessor
    hyperparameters = NULL, # Hyperparameters
    tuner = NULL, # Tuner
    execution_config, # ExecutionConfig
    y_training = NULL,
    y_validation = NULL,
    y_test = NULL,
    predicted_training = NULL,
    predicted_validation = NULL,
    predicted_test = NULL,
    se_training = NULL,
    se_validation = NULL,
    se_test = NULL,
    xnames = NULL,
    varimp = NULL,
    question = NULL,
    extra = NULL
  ) {
    # Metrics ----
    metrics_training <- regression_metrics(
      y_training,
      predicted_training,
      sample = "Training"
    )
    metrics_validation <- if (!is.null(y_validation)) {
      regression_metrics(
        y_validation,
        predicted_validation,
        sample = "Validation"
      )
    } else {
      NULL
    }
    metrics_test <- if (!is.null(y_test)) {
      regression_metrics(
        y_test,
        predicted_test,
        sample = "Test"
      )
    } else {
      NULL
    }
    new_object(
      Supervised(
        algorithm = algorithm,
        model = model,
        type = "Regression",
        preprocessor = preprocessor,
        preprocessor_internal = preprocessor_internal,
        hyperparameters = hyperparameters,
        tuner = tuner,
        execution_config = execution_config,
        y_training = y_training,
        y_validation = y_validation,
        y_test = y_test,
        predicted_training = predicted_training,
        predicted_validation = predicted_validation,
        predicted_test = predicted_test,
        metrics_training = metrics_training,
        metrics_validation = metrics_validation,
        metrics_test = metrics_test,
        xnames = xnames,
        varimp = varimp,
        question = question,
        extra = extra
      ),
      se_training = se_training,
      se_validation = se_validation,
      se_test = se_test
    )
  }
) # /rtemis::Regression


# %% plot_true_pred.Regression ----
#' Plot True vs. Predicted for Regression
#'
#' @param x `Regression` object.
#' @param what Character vector: What to plot. Can include "training", "validation", "test", or
#' "all", which will plot all available.
#' @param fit Character: Algorithm to use to draw fit line.
#' @param theme `Theme` object.
#' @param labelify Logical: If TRUE, labelify the axis labels.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(plot_true_pred, Regression) <- function(
  x,
  what = "all",
  fit = "glm",
  theme = choose_theme(getOption("rtemis_theme")),
  labelify = TRUE,
  ...
) {
  if (length(what) == 1 && what == "all") {
    what <- c("training", "validation", "test")
  }
  true <- paste0("y_", what)
  true_l <- Filter(
    Negate(is.null),
    sapply(true, function(z) prop(x, z))
  )
  predicted <- paste0("predicted_", what)
  predicted_l <- Filter(
    Negate(is.null),
    sapply(predicted, function(z) prop(x, z))
  )
  if (labelify) {
    names(predicted_l) <- labelify(names(predicted_l))
  }
  draw_fit(
    x = true_l,
    y = predicted_l,
    fit = fit,
    theme = theme,
    ...
  )
} # /rtemis::plot_true_pred.Regression


# %% plot_true_pred.Classification ----
#' Plot True vs. Predicted for Classification
#'
#' @param x `Classification` object.
#' @param what Character vector: What to plot. "training", "validation", "test"
#' @param xlab Optional Character: x axis label. If NULL, will be generated automatically.
#' @param theme `Theme` object.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(plot_true_pred, Classification) <- function(
  x,
  what = NULL,
  xlab = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  ...
) {
  if (is.null(what)) {
    if (!is.null(x@metrics_test)) {
      what <- "test"
    } else if (!is.null(x@metrics_validation)) {
      what <- "validation"
    } else {
      what <- "training"
    }
  }
  .confmat <- if (what == "training") {
    x@metrics_training
  } else if (what == "validation") {
    x@metrics_validation
  } else if (what == "test") {
    x@metrics_test
  }
  if (is.null(xlab)) {
    xlab <- labelify(paste("Predicted", what))
  }
  draw_confusion(
    .confmat,
    theme = theme,
    xlab = xlab,
    ...
  )
} # /rtemis::plot_true_pred.Classification


# %% plot_roc.Classification ----
method(plot_roc, Classification) <- function(
  x,
  what = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  filename = NULL,
  ...
) {
  if (is.null(x@predicted_prob_training)) {
    msg(highlight2("No predicted probabilities available."))
    return(invisible())
  }
  if (is.null(what)) {
    what <- if (!is.null(x@metrics_test)) {
      c("training", "test")
    } else {
      "training"
    }
  }
  labelsl <- probl <- list()

  if ("training" %in% what) {
    labelsl[["Training"]] <- x@y_training
    probl[["Training"]] <- x@predicted_prob_training
  }
  if ("test" %in% what && !is.null(x@predicted_prob_test)) {
    labelsl[["Test"]] <- x@y_test
    probl[["Test"]] <- x@predicted_prob_test
  }

  draw_roc(
    true_labels = labelsl,
    predicted_prob = probl,
    theme = theme,
    palette = palette,
    legend_title = "Sample (AUC)",
    filename = filename,
    ...
  )
} # /rtemis::plot_ROC.Classification


# %% make_Supervised ----
make_Supervised <- function(
  algorithm = NULL,
  model = NULL,
  preprocessor = NULL,
  preprocessor_internal = NULL,
  hyperparameters = NULL,
  tuner = NULL,
  execution_config,
  y_training = NULL,
  y_validation = NULL,
  y_test = NULL,
  predicted_training = NULL,
  predicted_validation = NULL,
  predicted_test = NULL,
  predicted_prob_training = NULL,
  predicted_prob_validation = NULL,
  predicted_prob_test = NULL,
  se_training = NULL,
  se_validation = NULL,
  se_test = NULL,
  xnames = character(),
  varimp = NULL,
  question = character(),
  extra = NULL,
  binclasspos = 2L
) {
  # Supervised ----
  if (is.factor(y_training)) {
    Classification(
      algorithm = algorithm,
      model = model,
      preprocessor = preprocessor,
      preprocessor_internal = preprocessor_internal,
      hyperparameters = hyperparameters,
      tuner = tuner,
      execution_config = execution_config,
      y_training = y_training,
      y_validation = y_validation,
      y_test = y_test,
      predicted_training = predicted_training,
      predicted_validation = predicted_validation,
      predicted_test = predicted_test,
      predicted_prob_training = predicted_prob_training,
      predicted_prob_validation = predicted_prob_validation,
      predicted_prob_test = predicted_prob_test,
      xnames = xnames,
      varimp = varimp,
      question = question,
      extra = extra,
      binclasspos = binclasspos
    )
  } else {
    Regression(
      algorithm = algorithm,
      model = model,
      preprocessor = preprocessor,
      preprocessor_internal = preprocessor_internal,
      hyperparameters = hyperparameters,
      tuner = tuner,
      execution_config = execution_config,
      y_training = y_training,
      y_validation = y_validation,
      y_test = y_test,
      predicted_training = predicted_training,
      predicted_validation = predicted_validation,
      predicted_test = predicted_test,
      se_training = se_training,
      se_validation = se_validation,
      se_test = se_test,
      xnames = xnames,
      varimp = varimp,
      question = question,
      extra = extra
    )
  }
} # /rtemis::make_Supervised


# %% write_Supervised ----
write_Supervised <- function(
  object,
  outdir = NULL,
  save_mod = FALSE,
  theme = choose_theme(getOption("rtemis_theme")),
  verbosity = 1L
) {
  if (verbosity > 0L) {
    print(object)
  }

  if (save_mod) {
    rt_save(object, outdir, verbosity = verbosity)
  }
} # /rtemis::write_Supervised


# %% present.Regression ----
# present method for Regression objects
# Plot training + test metrics, if available, side by side using `plotly::subplot()`
# & run `describe()` on the object
method(present, Regression) <- function(
  x,
  what = c("training", "test"),
  theme = choose_theme(getOption("rtemis_theme")),
  filename = NULL,
  ...
) {
  # Describe the model
  describe(x)
  # Plot True vs. Predicted
  plot_true_pred(
    x,
    what = what,
    theme = theme,
    filename = filename,
    ...
  )
} # /rtemis::present.Regression


# %% present.Classification ----
# present method for Classification objects
# Plot training + test metrics if available, side by side
method(present, Classification) <- function(
  x,
  what = c("training", "test"),
  type = c("ROC", "confusion"),
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  filename = NULL,
  ...
) {
  type <- match.arg(type)

  # Describe the model
  describe(x)

  if (type == "ROC") {
    plot_roc(
      x,
      what = what,
      theme = theme,
      palette = palette,
      filename = filename
    )
  } else if (type == "confusion") {
    # Training set plot
    if ("training" %in% what) {
      plot_training <- plot_true_pred(
        x,
        what = "training",
        theme = theme,
        xlab = "Predicted Training"
      )
    } else {
      plot_training <- NULL
    }
    # Test set plot
    if ("test" %in% what && !is.null(x@y_test)) {
      plot_test <- plot_true_pred(
        x,
        what = "test",
        theme = theme,
        xlab = "Predicted Test"
      )
    } else {
      plot_test <- NULL
    }

    # Combined plot
    # classification: confusion matrices side by side
    plotly::subplot(
      plot_training,
      plot_test,
      nrows = 1L,
      shareX = FALSE,
      shareY = FALSE,
      titleX = TRUE,
      titleY = TRUE,
      margin = 0.01
    )
  }
} # /rtemis::present.Classification


# %% SupervisedRes ----
# fields metrics_training/metrics_validation/metrics_test
# could be active bindings that are collected from @models
#' SupervisedRes
#'
#' @description
#' Superclass for Resampled supervised learning models.
#'
#' @author EDG
#' @noRd
SupervisedRes <- new_class(
  name = "SupervisedRes",
  properties = list(
    algorithm = class_character,
    models = class_list,
    type = class_character,
    preprocessor = Preprocessor | NULL,
    preprocessor_internal = Preprocessor | NULL,
    hyperparameters = Hyperparameters | NULL,
    tuner_config = TunerConfig | NULL,
    outer_resampler = Resampler,
    execution_config = ExecutionConfig,
    y_training = class_any,
    y_test = class_any,
    predicted_training = class_any,
    predicted_test = class_any,
    metrics_training = MetricsRes,
    metrics_test = MetricsRes,
    xnames = class_character,
    varimp = class_list | NULL,
    question = class_character | NULL,
    extra = class_any,
    session_info = class_any
  ),
  constructor = function(
    algorithm,
    models,
    type,
    preprocessor,
    preprocessor_internal,
    hyperparameters,
    tuner_config,
    outer_resampler,
    execution_config,
    y_training,
    y_test,
    predicted_training,
    predicted_test,
    metrics_training,
    metrics_test,
    metrics_training_mean,
    metrics_test_mean,
    xnames,
    varimp,
    question,
    extra
  ) {
    new_object(
      S7::S7_object(),
      algorithm = algorithm,
      models = models,
      type = models[[1]]@type,
      preprocessor = preprocessor,
      preprocessor_internal = preprocessor_internal,
      hyperparameters = hyperparameters,
      tuner_config = tuner_config,
      outer_resampler = outer_resampler,
      execution_config = execution_config,
      y_training = y_training,
      y_test = y_test,
      predicted_training = predicted_training,
      predicted_test = predicted_test,
      metrics_training = metrics_training,
      metrics_test = metrics_test,
      # metrics_training_mean = metrics_training_mean,
      # metrics_test_mean = metrics_test_mean,
      xnames = xnames,
      varimp = varimp,
      question = question,
      extra = extra,
      session_info = sessionInfo()
    )
  }
) # /rtemis::SupervisedRes


# %% repr.SupervisedRes ----
#' repr `SupervisedRes`
#'
#' @param x `SupervisedRes` object.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#' @param filename Character: Filename to save output to (not used).
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
method(repr, SupervisedRes) <- function(
  x,
  output_type = NULL,
  filename = NULL
) {
  output_type <- get_output_type(output_type, filename)

  # Class name + Alg name (2 lines)
  out <- paste0(
    repr_S7name(paste("Resampled", x@type, "Model"), output_type = output_type),
    highlight(x@algorithm, output_type = output_type),
    " (",
    desc_alg(x@algorithm),
    ")\n"
  )

  # Tuning, if available (1 line)
  if (!is.null(x@tuner_config)) {
    out <- paste0(
      out,
      fmt("\U2699", col = col_tuner, bold = TRUE, output_type = output_type),
      " Tuned using ",
      desc(x@tuner_config),
      ".\n"
    )
  }

  # Outer resampling (1 line)
  out <- paste0(
    out,
    fmt("\U27F3", col = col_outer, bold = TRUE, output_type = output_type),
    " Tested using ",
    desc(x@outer_resampler),
    ".\n"
  )

  # Calibration, if available
  if (prop_exists(x, "calibration_models")) {
    out <- paste0(
      out,
      fmt(
        "\U27CB",
        col = col_calibrator,
        bold = TRUE,
        output_type = output_type
      ),
      " Calibrated using ",
      desc_alg(x@calibration_models[[1]]@algorithm),
      " with ",
      desc(x@calibration_models[[1]]@outer_resampler@config),
      ".\n"
    )
  }

  out <- paste0(out, "\n")

  # Metrics, training
  if (prop_exists(x, "calibration_models")) {
    out <- paste0(
      out,
      repr_CalibratedClassificationResMetrics(
        x@metrics_training,
        x@metrics_training_calibrated,
        pad = 2L,
        output_type = output_type
      )
    )
  } else {
    out <- paste0(
      out,
      repr(x@metrics_training, pad = 2L, output_type = output_type)
    )
  }

  # Metrics, test
  if (prop_exists(x, "calibration_models")) {
    out <- paste0(
      out,
      "\n",
      repr_CalibratedClassificationResMetrics(
        x@metrics_test,
        x@metrics_test_calibrated,
        pad = 2L,
        output_type = output_type
      )
    )
  } else {
    out <- paste0(
      out,
      "\n",
      repr(x@metrics_test, pad = 2L, output_type = output_type)
    )
  }

  out
} # /rtemis::repr.SupervisedRes


# %% to_json.SupervisedRes ----
#' to_json `SupervisedRes`
#'
#' Convert a `SupervisedRes` (or `RegressionRes` / `ClassificationRes`)
#' object to a JSON-serializable list. The list of per-resample fitted
#' models (`@models`) is summarised by length only — individual model
#' details remain available on the server and can be fetched via
#' separate `job.result` requests if needed.
#'
#' @param x `SupervisedRes` object.
#'
#' @return Named list. Pass to `jsonlite::toJSON(auto_unbox = TRUE)`.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(to_json, SupervisedRes) <- function(x, ...) {
  out <- list(
    .class = S7_class(x)@name,
    algorithm = x@algorithm,
    type = x@type,
    question = x@question,
    xnames = x@xnames,
    n_features = length(x@xnames),
    n_resamples = length(x@models),
    preprocessor = .to_json_value(x@preprocessor),
    preprocessor_internal = .to_json_value(x@preprocessor_internal),
    hyperparameters = .to_json_value(x@hyperparameters),
    tuner_config = .to_json_value(x@tuner_config),
    outer_resampler = .to_json_value(x@outer_resampler),
    execution_config = .to_json_value(x@execution_config),
    metrics_training = .to_json_value(x@metrics_training),
    metrics_test = .to_json_value(x@metrics_test),
    # varimp is `class_list | NULL` of VariableImportance.
    # `.to_json_value` recurses through lists, dispatching `to_json` on
    # S7 elements and passing through anything else.
    varimp_per_resample = .to_json_value(x@varimp)
  )

  Filter(Negate(is.null), out)
} # /rtemis::to_json.SupervisedRes


# %% print.SupervisedRes ----
method(print, SupervisedRes) <- function(
  x,
  output_type = NULL,
  ...
) {
  cat(repr(x, output_type = output_type))
  invisible(x)
} # /rtemis::print.SupervisedRes


# %% predict.SupervisedRes ----
#' Predict SupervisedRes
#'
#' Predict Method for SupervisedRes objects
#'
#' @param object `SupervisedRes` object.
#' @param newdata data.frame or similar: New data to predict.
#' @param type Character: Type of prediction to output: "avg" applies `avg_fn` (default "mean") to
#' the predictions of individual models, "all" returns the predictions of all models in a
#' data.frame. "metrics" returns a list of data.frames with a) predictions from each model, b)
#' the mean of the predictions, and c) the standard deviation of the predictions.
#' @param ... Not used.
#'
#' @keywords internal
#' @noRd
method(predict, SupervisedRes) <- function(
  object,
  newdata,
  type = c("avg", "all", "metrics"),
  avg_fn = "mean",
  ...
) {
  check_inherits(newdata, "data.frame")
  type <- match.arg(type)

  predicted <- sapply(
    object@models,
    function(mod) {
      predict(mod, newdata = newdata)
    }
  ) # -> data.frame n cases x n resamples

  if (type == "all") {
    return(predicted)
  } else if (type == "avg") {
    return(apply(predicted, 1, avg_fn))
  } else if (type == "metrics") {
    mean_predictions <- apply(predicted, 2, mean)
    sd_predictions <- apply(predicted, 2, sd)
    return(list(
      predictions = predicted,
      mean = mean_predictions,
      sd = sd_predictions
    ))
  }
} # /rtemis::predict.SupervisedRes


# %% ClassificationRes ----
#' @title ClassificationRes
#'
#' @description
#' SupervisedRes subclass for Resampled classification models.
#'
#' @author EDG
#' @noRd
ClassificationRes <- new_class(
  name = "ClassificationRes",
  parent = SupervisedRes,
  properties = list(
    predicted_prob_training = class_any,
    predicted_prob_test = class_any
  ),
  constructor = function(
    algorithm,
    models,
    preprocessor,
    preprocessor_internal = NULL,
    hyperparameters,
    tuner_config,
    outer_resampler,
    execution_config,
    y_training,
    y_validation = NULL,
    y_test = NULL,
    predicted_training = NULL,
    predicted_test = NULL,
    predicted_prob_training = NULL,
    predicted_prob_test = NULL,
    xnames = NULL,
    varimp = NULL,
    question = NULL,
    extra = NULL
  ) {
    metrics_training <- ClassificationMetricsRes(
      sample = "Training",
      res_metrics = lapply(models, function(mod) mod@metrics_training)
    )
    metrics_test <- ClassificationMetricsRes(
      sample = "Test",
      res_metrics = lapply(models, function(mod) mod@metrics_test)
    )
    new_object(
      SupervisedRes(
        algorithm = algorithm,
        models = models,
        type = "Classification",
        preprocessor = preprocessor,
        preprocessor_internal = preprocessor_internal,
        hyperparameters = hyperparameters,
        tuner_config = tuner_config,
        outer_resampler = outer_resampler,
        execution_config = execution_config,
        y_training = y_training,
        y_test = y_test,
        predicted_training = predicted_training,
        predicted_test = predicted_test,
        metrics_training = metrics_training,
        metrics_test = metrics_test,
        # metrics_training_mean = metrics_training_mean,
        # metrics_test_mean = metrics_test_mean,
        xnames = xnames,
        varimp = varimp,
        question = question,
        extra = extra
      ),
      predicted_prob_training = predicted_prob_training,
      predicted_prob_test = predicted_prob_test
    )
  }
) # /rtemis::ClassificationRes


# %% CalibratedClassificationRes ----
#' @title CalibratedClassificationRes
#'
#' @description
#' ClassificationRes subclass for calibrated classification models.
#' The calibration models are trained on resamples of the `ClassificationRes`'s test data.
#'
#' @author EDG
#' @noRd
# We use getter functions to avoid duplicating data
CalibratedClassificationRes <- new_class(
  name = "CalibratedClassificationRes",
  parent = ClassificationRes,
  properties = list(
    calibration_models = class_list,
    predicted_training_calibrated = new_property(
      getter = function(self) {
        lapply(self@calibration_models, function(mod) {
          mod@predicted_training
        })
      }
    ),
    predicted_test_calibrated = new_property(
      getter = function(self) {
        lapply(self@calibration_models, function(mod) {
          mod@predicted_test
        })
      }
    ),
    predicted_prob_training_calibrated = new_property(
      getter = function(self) {
        lapply(self@calibration_models, function(mod) {
          mod@predicted_prob_training
        })
      }
    ),
    predicted_prob_test_calibrated = new_property(
      getter = function(self) {
        lapply(self@calibration_models, function(mod) {
          mod@predicted_prob_test
        })
      }
    ),
    metrics_training_calibrated = ClassificationMetricsRes,
    metrics_test_calibrated = ClassificationMetricsRes
  ),
  constructor = function(ClassificationRes_model, calibrations_models) {
    # Aggregate calibrated metrics from individual models within each calibration resample
    # calibrations_models is a list of *Res objects, each containing multiple models
    # We need to extract all individual model metrics and aggregate them
    all_training_metrics <- unlist(
      lapply(calibrations_models, function(calmod) {
        calmod@metrics_training@res_metrics
      }),
      recursive = FALSE
    )
    all_test_metrics <- unlist(
      lapply(calibrations_models, function(calmod) {
        calmod@metrics_test@res_metrics
      }),
      recursive = FALSE
    )

    metrics_training_calibrated <- ClassificationMetricsRes(
      sample = "Training",
      res_metrics = all_training_metrics
    )
    metrics_test_calibrated <- ClassificationMetricsRes(
      sample = "Test",
      res_metrics = all_test_metrics
    )

    new_object(
      ClassificationRes_model,
      calibration_models = calibrations_models,
      metrics_training_calibrated = metrics_training_calibrated,
      metrics_test_calibrated = metrics_test_calibrated
    )
  }
) # /rtemis::CalibratedClassificationRes


# %% predict.CalibratedClassificationRes ----
method(predict, CalibratedClassificationRes) <- function(
  object,
  newdata,
  what = c("avg", "all", "metrics"),
  avg_fn = "mean",
  ...
) {
  check_inherits(newdata, "data.frame")
  what <- match.arg(what)

  # Check lengths match
  if (length(object@models) != length(object@calibration_models)) {
    cli::cli_abort("Number of base models and calibration models must match.")
  }

  predicted <- mapply(
    function(base_mod, cal_mod) {
      # 1. Predict with base model
      raw_prob <- predict(
        base_mod,
        newdata = newdata
      )

      # 2. Predict with calibration model
      predict(
        cal_mod,
        newdata = data.frame(predicted_probabilities = raw_prob)
      )
    },
    object@models,
    object@calibration_models,
    SIMPLIFY = TRUE
  ) # -> matrix n cases x n resamples

  if (what == "all") {
    return(predicted)
  } else if (what == "avg") {
    return(apply(predicted, 1, avg_fn))
  } else if (what == "metrics") {
    mean_predictions <- apply(predicted, 2, mean)
    sd_predictions <- apply(predicted, 2, sd)
    # Return both aggregated prediction metrics (per resample)
    # Keeping consistent with SupervisedRes
    return(list(
      predictions = predicted,
      mean = mean_predictions,
      sd = sd_predictions
    ))
  }
} # /rtemis::predict.CalibratedClassificationRes


# %% RegressionRes ----
#' @title RegressionRes
#'
#' @description
#' SupervisedRes subclass for Resampled regression models.
#'
#' @author EDG
#' @noRd
RegressionRes <- new_class(
  name = "RegressionRes",
  parent = SupervisedRes,
  properties = list(
    se_training = class_any,
    se_validation = class_any,
    se_test = class_any
  ),
  constructor = function(
    algorithm,
    models,
    preprocessor,
    preprocessor_internal,
    hyperparameters,
    tuner_config,
    outer_resampler,
    execution_config,
    y_training,
    y_validation = NULL,
    y_test = NULL,
    predicted_training = NULL,
    predicted_test = NULL,
    se_training = NULL,
    se_test = NULL,
    xnames = NULL,
    varimp = NULL,
    question = NULL,
    extra = NULL
  ) {
    metrics_training <- lapply(
      models,
      function(mod) mod@metrics_training@metrics
    )
    metrics_test <- lapply(models, function(mod) mod@metrics_test@metrics)
    metrics_training <- RegressionMetricsRes(
      sample = "Training",
      res_metrics = lapply(models, function(mod) mod@metrics_training)
    )
    metrics_test <- RegressionMetricsRes(
      sample = "Test",
      res_metrics = lapply(models, function(mod) mod@metrics_test)
    )
    new_object(
      SupervisedRes(
        algorithm = algorithm,
        models = models,
        type = "Regression",
        preprocessor = preprocessor,
        preprocessor_internal = preprocessor_internal,
        hyperparameters = hyperparameters,
        tuner_config = tuner_config,
        outer_resampler = outer_resampler,
        execution_config = execution_config,
        y_training = y_training,
        y_test = y_test,
        predicted_training = predicted_training,
        predicted_test = predicted_test,
        metrics_training = metrics_training,
        metrics_test = metrics_test,
        # metrics_training_mean = metrics_training_mean,
        # metrics_test_mean = metrics_test_mean,
        xnames = xnames,
        varimp = varimp,
        question = question,
        extra = extra
      ),
      se_training = se_training,
      se_test = se_test
    )
  }
) # /rtemis::RegressionRes


# %% desc.SupervisedRes ----
method(desc, SupervisedRes) <- function(x, metric = NULL) {
  type <- x@type
  algorithm <- desc_alg(x@algorithm)
  # cat(algorithm, " was used for ", tolower(type), ".\n", sep = "")
  out <- paste0(algorithm, " was used for ", tolower(type), ".")

  # Tuning ----
  if (length(x@tuner_config) > 0) {
    out <- paste0(
      out,
      " Hyperparameter tuning was performed using ",
      desc(x@tuner_config),
      "."
    )
  }

  # Metrics ----
  if (type == "Classification") {
    if (is.null(metric)) {
      metric <- "Balanced_Accuracy"
    }
    out <- paste(
      out,
      "Mean",
      labelify(metric, toLower = TRUE),
      "was",
      ddSci(x@metrics_training@mean_metrics[["Balanced_Accuracy"]]),
      "in the training set and",
      ddSci(x@metrics_test@mean_metrics[["Balanced_Accuracy"]]),
      "in the test set across "
    )
  } else if (type == "Regression") {
    out <- paste(
      out,
      "Mean R-squared was",
      ddSci(x@metrics_training@mean_metrics[["Rsq"]]),
      "on the training set and",
      ddSci(x@metrics_test@mean_metrics[["Rsq"]]),
      "on the test set across "
    )
  }
  out <- paste0(out, desc(x@outer_resampler), ".")
  invisible(out)
} # /rtemis::desc.SupervisedRes


# %% describe.SupervisedRes ----
#' Describe `SupervisedRes`
#'
#' @param x `SupervisedRes` object.
#' @param ... Not used.
#'
#' @return Character string describing the `SupervisedRes` object, invisibly.
#'
#' @author EDG
#' @noRd
#'
#' @examples
#' mod <- train(iris, algorithm = "CART", outer_resampling_config = setup_Resampler())
#' describe(mod)
method(describe, SupervisedRes) <- function(x, ...) {
  cat(desc(x), "\n")
}


# %% present.SupervisedRes ----
method(present, SupervisedRes) <- function(
  x,
  theme = choose_theme(getOption("rtemis_theme")),
  ...
) {
  # Describe the model
  describe(x)
  # Plot the performance metrics
  plot_metric(x, what = c("training", "test"), theme = theme, ...)
} # /rtemis::present.SupervisedRes


# %% plot_true_pred.RegressionRes ----
# Plot true vs. predicted aggregated across resamples for either training, test, or both.
#' Plot True vs. Predicted for RegressionRes
#'
#' @param x `RegressionRes` object.
#' @param what Character vector: "all", "training", "test". Which set(s) to plot.
#' @param fit Character: Algorithm to use to draw fit line.
#' @param theme `Theme` object.
#' @param labelify Logical: If TRUE, labelify the axis labels.
#' @param ... Additional arguments passed to [draw_fit].
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(plot_true_pred, RegressionRes) <- function(
  x,
  what = "all",
  fit = "glm",
  theme = choose_theme(getOption("rtemis_theme")),
  labelify = TRUE,
  ...
) {
  if (length(what) == 1 && what == "all") {
    what <- c("training", "test")
  }
  true <- paste0("y_", what)
  true_l <- sapply(true, function(z) {
    unlist(prop(x, z), use.names = FALSE)
  })

  predicted <- paste0("predicted_", what)
  predicted_l <- sapply(predicted, function(z) {
    unlist(prop(x, z), use.names = FALSE)
  })
  if (labelify) {
    names(predicted_l) <- labelify(names(predicted_l))
  }
  draw_fit(
    x = true_l,
    y = predicted_l,
    fit = fit,
    theme = theme,
    ...
  )
} # /rtemis::plot_true_pred.RegressionRes


# %% plot_true_pred.ClassificationRes ----
# Cannot be combined with plot_true_pred.RegressionRes
# because scatter can overplot train & test, but confusion matrices must be subplots.
#' Plot True vs. Predicted for ClassificationRes
#'
#' @param x `ClassificationRes` object.
#' @param what Character vector: "all", "training", "test". Which set(s) to plot.
#' @param theme `Theme` object.
#' @param ... Additional arguments passed to [draw_confusion].
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(plot_true_pred, ClassificationRes) <- function(
  x,
  what = "all",
  theme = choose_theme(getOption("rtemis_theme")),
  ...
) {
  if (length(what) == 1 && what == "all") {
    what <- c("training", "test")
  }
  true <- paste0("y_", what)
  true_l <- sapply(true, function(z) {
    unlist(prop(x, z), use.names = FALSE)
  })

  predicted <- paste0("predicted_", what)
  predicted_l <- sapply(predicted, function(z) {
    unlist(prop(x, z), use.names = FALSE)
  })
  # if (labelify) {
  #   names(predicted_l) <- labelify(names(predicted_l))
  # }
  # => Do not pass filename to both training & testing, latter will overwrite; pass to subplot if
  # plotting both
  # Training
  if ("training" %in% what) {
    plt_training <- draw_confusion(
      table(true_l[["y_training"]], predicted_l[["predicted_training"]]),
      xlab = "Predicted Training",
      theme = theme,
      ...
    )
  }
  if ("test" %in% what) {
    plt_test <- draw_confusion(
      table(true_l[["y_test"]], predicted_l[["predicted_test"]]),
      xlab = "Predicted Test",
      theme = theme,
      ...
    )
  }

  if (length(what) == 1) {
    if (what == "training") {
      return(plt_training)
    } else {
      return(plt_test)
    }
  } else {
    return(plotly::subplot(
      plt_training,
      plt_test,
      nrows = 1L,
      shareX = FALSE,
      shareY = FALSE
    ))
  }
} # /rtemis::plot_true_pred.ClassificationRes


# %% plot_roc.ClassificationRes ----
#' Plot ROC for ClassificationRes
#'
#' @param x `ClassificationRes` object.
#' @param what Character vector: "all", "training", "test". Which set(s) to plot.
#' @param theme `Theme` object.
#' @param col Character vector: Colors to use for the ROC curves.
#' @param filename Character: Filename to save the plot to.
#' @param ... Additional arguments passed to [draw_roc].
#'
#' @return plotly object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(plot_roc, ClassificationRes) <- function(
  x,
  what = "all",
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  filename = NULL,
  ...
) {
  if (length(what) == 1 && what == "all") {
    what <- c("training", "test")
  }
  labelsl <- probl <- list()

  if ("training" %in% what) {
    labelsl[["Training"]] <- unlist(x@y_training, use.names = FALSE)
    probl[["Training"]] <- unlist(x@predicted_prob_training, use.names = FALSE)
  }
  if ("test" %in% what && !is.null(x@predicted_prob_test)) {
    labelsl[["Test"]] <- unlist(x@y_test, use.names = FALSE)
    probl[["Test"]] <- unlist(x@predicted_prob_test, use.names = FALSE)
  }

  draw_roc(
    true_labels = labelsl,
    predicted_prob = probl,
    theme = theme,
    palette = palette,
    legend_title = "Sample (AUC)",
    filename = filename,
    ...
  )
} # /rtemis::plot_roc.ClassificationRes


# %% plot_metric.SupervisedRes ----
#' Plot Metric SupervisedRes
#'
#' Plot boxplot of performance metrics across resamples.
#'
#' @param x `SupervisedRes` object.
#' @param what Character vector: "training", "test". What to print. Default is to print both.
#' @param metric Character: Metric to plot.
#' @param ylab Character: Label for the y-axis.
#' @param boxpoints Character:"all", "outliers" - How to display points in the boxplot.
#' @param theme `Theme` object.
#' @param ... Additional arguments passed to the plotting function.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(plot_metric, SupervisedRes) <- function(
  x,
  what = c("training", "test"),
  metric = NULL,
  ylab = labelify(metric),
  boxpoints = "all",
  theme = choose_theme(getOption("rtemis_theme")),
  ...
) {
  what <- match.arg(what, several.ok = TRUE)
  .class <- x@type == "Classification"

  # Metric
  if (is.null(metric)) {
    if (.class) {
      metric <- "Balanced_Accuracy"
    } else {
      metric <- "Rsq"
    }
  }

  xl <- list()
  if ("training" %in% what) {
    if (.class) {
      xl[["Training"]] <- sapply(
        x@metrics_training@res_metrics,
        function(fold) {
          fold[["Overall"]][[metric]]
        }
      )
    } else {
      xl[["Training"]] <- sapply(
        x@metrics_training@res_metrics,
        function(fold) {
          fold[[metric]]
        }
      )
    }
  }
  if ("test" %in% what) {
    if (.class) {
      xl[["Test"]] <- sapply(x@metrics_test@res_metrics, function(fold) {
        fold[["Overall"]][[metric]]
      })
    } else {
      xl[["Test"]] <- sapply(x@metrics_test@res_metrics, function(fold) {
        fold[[metric]]
      })
    }
  }

  # Boxplot ----
  draw_box(xl, theme = theme, ylab = ylab, boxpoints = boxpoints, ...)
} # /rtemis::plot_metric.SupervisedRes


# %% plot_varimp.Supervised ----
method(plot_varimp, Supervised) <- function(
  x,
  measure = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  filename = NULL,
  ...
) {
  if (is.null(x@varimp)) {
    msg(highlight2("No variable importance available."))
    return(invisible())
  }
  if (is.null(measure)) {
    vi <- x@varimp@data[[2L]]
  } else {
    vi <- x@varimp@data[[measure]]
  }
  names(vi) <- x@varimp@data[["variable"]]
  draw_varimp(vi, theme = theme, filename = filename, ...)
} # /rtemis::plot_varimp.Supervised


# %% plot_varimp.SupervisedRes ----
method(plot_varimp, SupervisedRes) <- function(
  x,
  measure = NULL,
  ylab = NULL,
  summarize_fn = "mean",
  show_top = 20L,
  theme = choose_theme(getOption("rtemis_theme")),
  filename = NULL,
  ...
) {
  if (is.null(x@varimp)) {
    msg(highlight2("No variable importance available."))
    return(invisible())
  }
  check_inherits(summarize_fn, "character")

  # Extract named numeric vectors from each VariableImportance object.
  # Not every variable gets a score in every resample, so rbindlist with fill.
  varimp_list <- lapply(x@varimp, function(z) {
    vi <- if (is.null(measure)) z@data[[2L]] else z@data[[measure]]
    names(vi) <- z@data[["variable"]]
    as.data.table(as.list(vi))
  })

  varimp <- rbindlist(varimp_list, use.names = TRUE, fill = TRUE)
  # Missing scores (variable absent in a resample) treated as 0
  setDF(varimp)
  varimp[is.na(varimp)] <- 0
  # Summarize and sort
  varimp_summary <- apply(varimp, 2, summarize_fn)
  varimp_sorted <- varimp_summary[order(-varimp_summary)]
  if (length(varimp_sorted) > show_top) {
    varimp_sorted <- varimp_sorted[seq_len(show_top)]
  }
  # ylab
  if (is.null(ylab)) {
    measure_name <- if (is.null(measure)) {
      names(x@varimp[[1L]]@data)[2L]
    } else {
      measure
    }
    ylab <- paste0(
      labelify(paste(summarize_fn, measure_name)),
      "\n(across ",
      desc(x@outer_resampler),
      ")"
    )
  }
  draw_varimp(
    varimp_sorted,
    theme = theme,
    ylab = ylab,
    filename = filename,
    ...
  )
} # /rtemis::plot_varimp.SupervisedRes


# %% make_SupervisedRes ----
#' Make SupervisedRes
#'
#' @author EDG
#' @keywords internal
#' @noRd
make_SupervisedRes <- function(
  algorithm,
  type,
  models,
  preprocessor,
  preprocessor_internal,
  hyperparameters,
  tuner_config,
  outer_resampler,
  execution_config,
  y_training,
  y_test,
  predicted_training,
  predicted_test,
  predicted_prob_training,
  predicted_prob_test,
  se_training = NULL,
  se_test = NULL,
  xnames = character(),
  varimp = NULL,
  question = character(),
  extra = NULL
) {
  if (type == "Classification") {
    ClassificationRes(
      algorithm = algorithm,
      models = models,
      preprocessor = preprocessor,
      preprocessor_internal = preprocessor_internal,
      hyperparameters = hyperparameters,
      tuner_config = tuner_config,
      outer_resampler = outer_resampler,
      execution_config = execution_config,
      y_training = y_training,
      y_test = y_test,
      predicted_training = predicted_training,
      predicted_test = predicted_test,
      predicted_prob_training = predicted_prob_training,
      predicted_prob_test = predicted_prob_test,
      xnames = xnames,
      varimp = varimp,
      question = question,
      extra = extra
    )
  } else {
    RegressionRes(
      algorithm = algorithm,
      models = models,
      preprocessor = preprocessor,
      preprocessor_internal = preprocessor_internal,
      hyperparameters = hyperparameters,
      tuner_config = tuner_config,
      outer_resampler = outer_resampler,
      execution_config = execution_config,
      y_training = y_training,
      y_test = y_test,
      predicted_training = predicted_training,
      predicted_test = predicted_test,
      se_training = se_training,
      se_test = se_test,
      xnames = xnames,
      varimp = varimp,
      question = question,
      extra = extra
    )
  }
} # /rtemis::make_SupervisedRes

early_stopping_algs <- c("LightGBM", "LightRF", "LightRuleFit")


# LightRuleFit ----
#' @title LightRuleFit
#'
#' @description
#' Class for LightRuleFit models.
#'
#' @author EDG
#' @noRd
LightRuleFit <- new_class(
  name = "LightRuleFit",
  properties = list(
    model_lightgbm = Supervised,
    model_glmnet = Supervised,
    rules = class_character,
    rules_coefs = class_data.frame,
    rules_index = class_integer,
    rules_selected = class_character,
    rules_selected_formatted = class_character,
    rules_selected_formatted_coefs = class_data.frame,
    y_levels = class_character | NULL,
    xnames = class_character,
    complexity_metrics = class_data.frame
  )
) # /rtemis::LightRuleFit

# Print LightRuleFit ----
method(print, LightRuleFit) <- function(x, ...) {
  objcat("rtemis LightRuleFit Model")
  cat(
    "Trained using ",
    highlight(x@model_lightgbm@algorithm),
    " and ",
    highlight(x@model_glmnet@algorithm),
    ".\n",
    sep = ""
  )
  cat("Selected", highlight(length(x@rules_selected)), "rules.\n")
  invisible(x)
} # /rtemis::print.LightRuleFit


# get_metric Regression ----
method(get_metric, Regression) <- function(x, set, metric) {
  prop(prop(x, paste0("metrics_", set)), "metrics")[[metric]]
} # /get_metric.Regression

# get_metric Classification ----
method(get_metric, Classification) <- function(x, set, metric) {
  prop(prop(x, paste0("metrics_", set)), "metrics")[["Overall"]][[metric]]
} # /get_metric.Classification

# get_metric RegressionRes ----
method(get_metric, RegressionRes) <- function(x, set, metric) {
  sapply(
    prop(prop(x, paste0("metrics_", set)), "res_metrics"),
    function(r) {
      r[[metric]]
    }
  )
} # /rtemis::get_metric.RegressionRes

# get_metric ClassificationRes ----
method(get_metric, ClassificationRes) <- function(x, set, metric) {
  sapply(
    prop(prop(x, paste0("metrics_", set)), "res_metrics"),
    function(r) {
      r[["Overall"]][[metric]]
    }
  )
} # /rtemis::get_metric.ClassificationRes


# Describe list of Supervised/Res ----
#' Describe multiple Supervised or SupervisedRes objects
#'
#' @param x List of `Supervised` or `SupervisedRes` objects.
#' @param metric Character: Metric to use for description. Default is NULL, which uses "Balanced_Accuracy" for Classification and "Rsq" for Regression.
#' @param decimal_places Integer: Number of decimal places to round metrics to.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
method(desc, class_list) <- function(
  x,
  metric = NULL,
  decimal_places = 3L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  # Check all elements are Supervised or all are SupervisedRes objects
  if (
    !all(sapply(x, S7_inherits, Supervised)) &&
      !all(sapply(x, S7_inherits, SupervisedRes))
  ) {
    cli::cli_abort(
      "All elements must be either Supervised or SupervisedRes objects"
    )
  }
  type <- if (S7_inherits(x[[1]], SupervisedRes)) {
    "SupervisedRes"
  } else {
    "Supervised"
  }

  # Check that all models are of the same type
  if (!all(sapply(x, function(m) m@type == x[[1]]@type))) {
    cli::cli_abort(
      "All objects must be of the same supervised learning type (Classification or Regression)."
    )
  }

  # Name list using algorithm names
  if (is.null(names(x))) {
    names(x) <- sapply(x, function(m) m@algorithm)
  }
  suptype <- x[[1]]@type

  # SupervisedRes
  if (type == "SupervisedRes") {
    # Check that the same resampling method was used - ideally same seed, but do not enforce that, but report it
    # Get resampling config from each
    res_params <- lapply(x, function(m) m@outer_resampler@config)
    # Check all resamplers of same type
    if (!all(sapply(res_params, function(p) p@type == res_params[[1]]@type))) {
      cli::cli_warn(
        "All SupervisedRes objects must use the same resampling method."
      )
    }
    # ?replace with loop that checks all resampler params
    # Check all resamplers use same n
    if (!all(sapply(res_params, function(p) p@n == res_params[[1]]@n))) {
      cli::cli_warn(
        "All SupervisedRes objects must use the same number of resamples."
      )
    }

    # Describe SupervisedRes objects
    # 1. Report names of algorithms used.
    out <- paste0(
      oxfordcomma(sapply(x, function(m) desc_abb_alg(m@algorithm))),
      " were used for ",
      suptype,
      ".\n"
    )
    # 2. Get metric
    if (is.null(metric)) {
      metric <- if (suptype == "Classification") {
        "Balanced_Accuracy"
      } else {
        "Rsq"
      }
    }
    metricv <- sapply(x, function(m) m@metrics_test@mean_metrics[[metric]])
  } # /SupervisedRes
  if (type == "Supervised") {
    # 1. Report names of algorithms used.
    out <- paste0(
      oxfordcomma(sapply(x, function(m) desc_abb_alg(m@algorithm))),
      " were used for ",
      suptype,
      ".\n"
    )
    # 2. Get metric
    if (is.null(metric)) {
      metric <- if (suptype == "Classification") {
        "Balanced_Accuracy"
      } else {
        "Rsq"
      }
    }
    if (suptype == "Classification") {
      # Classification
      metricv <- sapply(x, function(m) {
        m@metrics_test@metrics[["Overall"]][[metric]]
      })
    } else {
      # Regression
      metricv <- sapply(x, function(m) m@metrics_test@metrics[[metric]])
    }
  } # /rtemis::Supervised

  # 3. Report mean metric across all models, sorting by performance
  metric_sorted <- sort(metricv, decreasing = TRUE)
  # => Get ties at specified decimal_places
  out <- paste0(
    out,
    "The top-performing model was ",
    bold(names(metric_sorted)[1], output_type = output_type),
    " with a test-set ",
    bold(labelify(metric), output_type = output_type),
    " of ",
    bold(
      ddSci(metric_sorted[1], decimal_places = decimal_places),
      output_type = output_type
    ),
    ", followed by ",
    oxfordcomma(names(metric_sorted[-1])),
    " with ",
    metric,
    " of ",
    oxfordcomma(ddSci(metric_sorted[-1], decimal_places = decimal_places)),
    " respectively."
  )
  out
} # /rtemis::desc.list


# %% describe.list(Supervised/Res) ----
#' Print description of a list of Supervised or SupervisedRes objects
#'
#' @param x List of `Supervised` or `SupervisedRes` objects.
#' @param ... See details.
#'
#' @details
#' Extra arguments:
#' - `metric`: Character: Metric to use for description. If NULL, defaults to "Balanced_Accuracy" for Classification and "Rsq" for Regression.
#' - `decimal_places`: Integer: Number of decimal places to round metrics to.
#' - `output_type`: Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character of description invisibly. Prints description to output.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
method(describe, class_list) <- function(
  x,
  metric = NULL,
  decimal_places = 3L,
  output_type = NULL,
  ...
) {
  out <- desc(
    x,
    metric = metric,
    decimal_places = decimal_places,
    output_type = output_type
  )
  cat(out, "\n")
  invisible(out)
} # /rtemis::describe.list(Supervised/Res)


================================================
FILE: R/08_MassUni.R
================================================
# S7_MassUni.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% MassGLM ----
#' @title MassGLM
#'
#' @description
#' Superclass for mass-univariate models.
#'
#' @author EDG
#' @noRd
MassGLM <- new_class(
  name = "MassGLM",
  properties = list(
    summary = class_data.table,
    ynames = class_character,
    xnames = class_character,
    coefnames = class_character,
    family = class_character
  )
) # /rtemis::MassGLM


# %% `$`.MassGLM ----
# Make MassGLM@name `$`-accessible ----
method(`$`, MassGLM) <- function(x, name) {
  prop(x, name)
}


# %% `.DollarNames`.MassGLM ----
# `$`-autocomplete MassGLM ----
method(`.DollarNames`, MassGLM) <- function(x, pattern = "") {
  prop_names <- names(props(x))
  grep(pattern, prop_names, value = TRUE)
}


# %% `[[`.MassGLM ----
# Make MassGLM@name `[[`-accessible ----
method(`[[`, MassGLM) <- function(x, name) {
  prop(x, name)
}


# %% repr.MassGLM ----
method(repr, MassGLM) <- function(
  x,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name("MassGLM", pad = pad),
    highlight(length(x@ynames)),
    " GLMs of family ",
    bold(x@family),
    " with ",
    highlight(length(x@xnames)),
    ngettext(length(x@xnames), " predictor", " predictors"),
    " each.",
    "\nAvailable coefficients: ",
    paste(highlight(x@coefnames), collapse = ", "),
    "\n"
  )
} # /rtemis::repr.MassGLM


# %% print.MassGLM ----
#' Print MassGLM
#'
#' @param x MassGLM object.
#' @param ... Not used.
#'
#' @return `x`, invisibly.
#'
#' @author EDG
#' @noRd
method(print, MassGLM) <- function(x, output_type = NULL, ...) {
  cat(repr(x, output_type = output_type))
  invisible(x)
} # /rtemis::print.MassGLM


# %% plot.MassGLM ----
#' Plot MassGLM using volcano plot
#'
#' @param x MassGLM object trained using [massGLM].
#' @param coefname Character: Name of coefficient to plot. If `NULL`, the first coefficient is used.
#' @param p_adjust_method Character: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" -
#' p-value adjustment method.
#' @param p_transform Function to transform p-values for plotting. Default is `function(x) -log10(x)`.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param theme `Theme` object. Create using one of the `theme_` functions, e.g.
#' `theme_whitegrid()`.
#' @param verbosity Integer: Verbosity level.
#' @param ... Additional arguments passed to [draw_volcano].
#'
#' @return plotly object with volcano plot.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' set.seed(2019)
#' y <- rnormmat(500, 500, return_df = TRUE)
#' x <- data.frame(x = y[, 3] + y[, 5] - y[, 9] + y[, 15] + rnorm(500))
#' mod <- massGLM(x, y)
#' plot(mod)
plot.MassGLM <- method(plot, MassGLM) <- function(
  x,
  coefname = NULL,
  p_adjust_method = "holm",
  p_transform = function(x) -log10(x),
  xlab = "Coefficient",
  ylab = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  verbosity = 1L,
  ...
) {
  if (is.null(coefname)) {
    coefname <- x@coefnames[1]
  }
  if (!coefname %in% x@coefnames) {
    cli::cli_abort(c(
      "i" = "{.var coefname} must be one of available coefnames: {.strong {x@coefnames}}",
      "x" = "You asked for: {.strong {coefname}}"
    ))
  }
  if (verbosity > 0L) {
    msg(
      "Plotting coefficients for",
      highlight(coefname),
      "x",
      length(x@ynames),
      "outcomes."
    )
  }

  # y-axis label ----
  if (is.null(ylab)) {
    ylab <- fn2label(p_transform, "p-value")
    ylab <- paste(ylab, "for", coefname)
    if (p_adjust_method != "none") {
      ylab <- paste0(ylab, " (", labelify(p_adjust_method), "-corrected)")
    }
  }

  # Plot ----
  coefs <- x@summary[[paste0("Coefficient_", coefname)]]
  pvals <- x@summary[[paste0("p_value_", coefname)]]
  draw_volcano(
    x = coefs,
    pvals = pvals,
    xnames = x@ynames,
    p_adjust_method = p_adjust_method,
    p_transform = p_transform,
    theme = theme,
    xlab = xlab,
    ylab = ylab,
    ...
  )
} # /rtemis::plot.MassGLM


# %% plot_manhattan.MassGLM ----
#' @name
#' plot_manhattan
#'
#' @param x MassGLM object.
#' @param coefname Character: Name of coefficient to plot. If `NULL`, the first coefficient is used.
#' @param p_adjust_method Character: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" -
#' p-value adjustment method.
#' @param p_transform Function to transform p-values for plotting. Default is `function(x) -log10(x)`.
#' @param ylab Character: y-axis label.
#' @param theme `Theme` object.
#' @param col_pos Character: Color for positive significant coefficients.
#' @param col_neg Character: Color for negative significant coefficients.
#' @param alpha Numeric: Transparency level for the bars.
#' @param ... Additional arguments passed to [draw_bar].
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # x: outcome of interest as first column, optional covariates in the other columns
#' # y: features whose association with x we want to study
#' set.seed(2022)
#' y <- data.table(rnormmat(500, 40))
#' x <- data.table(
#'   x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500),
#'   x2 = y[[21]] + rnorm(500)
#' )
#' massmod <- massGLM(x, y)
#' plot_manhattan(massmod)
plot_manhattan.MassGLM <- method(plot_manhattan, MassGLM) <- function(
  x,
  coefname = NULL,
  p_adjust_method = c(
    "holm",
    "hochberg",
    "hommel",
    "bonferroni",
    "BH",
    "BY",
    "fdr",
    "none"
  ),
  p_transform = function(x) -log10(x),
  ylab = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  col_pos = "#43A4AC",
  col_neg = "#FA9860",
  alpha = 0.8,
  ...
) {
  p_adjust_method <- match.arg(p_adjust_method)
  if (is.null(coefname)) {
    coefname <- x@coefnames[1]
  }
  if (!coefname %in% x@coefnames) {
    stop(
      "coefname must be one of the coefnames available in the MassGLM object."
    )
  }

  # y-axis label ----
  if (is.null(ylab)) {
    ylab <- fn2label(p_transform, "p-value")
    ylab <- paste(ylab, "for", coefname)
    if (p_adjust_method != "none") {
      ylab <- paste0(ylab, " (", labelify(p_adjust_method), "-corrected)")
    }
  }

  # Plot ----
  coefs <- x@summary[[paste0("Coefficient_", coefname)]]
  pvals <- x@summary[[paste0("p_value_", coefname)]]
  pvals <- p.adjust(pvals, method = p_adjust_method)
  signif_pos_idi <- pvals < 0.05 & coefs > 0
  signif_neg_idi <- pvals < 0.05 & coefs < 0
  col <- rep(
    adjustcolor(theme[["fg"]], alpha.f = alpha),
    length(pvals)
  )
  col[signif_pos_idi] <- adjustcolor(col_pos, alpha.f = alpha)
  col[signif_neg_idi] <- adjustcolor(col_neg, alpha.f = alpha)

  draw_bar(
    x = p_transform(pvals),
    theme = theme,
    palette = col,
    group_names = x@ynames,
    ylab = ylab,
    ...
  )
} # /rtemis::plot_manhattan.MassGLM


# %% summary.MassGLM ----
method(summary, MassGLM) <- function(object, ...) {
  object@summary
} # /rtemis::summary.MassGLM


================================================
FILE: R/09_ClusteringConfig.R
================================================
# S7_ClusteringConfig.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% ClusteringConfig ----
#' @title ClusteringConfig
#'
#' @description
#' Clustering config class.
#'
#' @field algorithm Character: Algorithm name.
#' @field config List: Algorithm-specific config.
#'
#' @author EDG
#' @keywords internal
#' @noRd
ClusteringConfig <- new_class(
  name = "ClusteringConfig",
  properties = list(
    algorithm = class_character,
    config = class_list
  )
) # /rtemis::ClusteringConfig


# %% `$`.ClusteringConfig ----
# Make ClusteringConfig@config `$`-accessible
method(`$`, ClusteringConfig) <- function(x, name) {
  x@config[[name]]
}


# %% `.DollarNames`.ClusteringConfig ----
# `$`-autocomplete ClusteringConfig@config ----
method(`.DollarNames`, ClusteringConfig) <- function(x, pattern = "") {
  all_names <- names(x@config)
  grep(pattern, all_names, value = TRUE)
}


# %% `[[`.ClusteringConfig ----
# Make ClusteringConfig@config `[[`-accessible
method(`[[`, ClusteringConfig) <- function(x, index) {
  x@config[[index]]
}


# %% repr.ClusteringConfig ----
method(repr, ClusteringConfig) <- function(
  x,
  pad = 0L,
  output_type = NULL,
  ...
) {
  output_type <- get_output_type(output_type)
  out <- repr_S7name(
    paste(x@algorithm, "ClusteringConfig"),
    pad = pad,
    output_type = output_type
  )
  paste0(
    out,
    repr_ls(props(x)[["config"]], pad = pad, output_type = output_type)
  )
} # /rtemis::repr.ClusteringConfig


# %% print.ClusteringConfig ----
#' Print Method for ClusteringConfig
#'
#' @param x ClusteringConfig object.
#' @param pad Integer: Left side padding.
#'
#' @return ClusteringConfig object, invisibly.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(print, ClusteringConfig) <- function(
  x,
  pad = 0L,
  output_type = NULL,
  ...
) {
  cat(repr(x, pad = pad, output_type = output_type))
  invisible(x)
} # /rtemis::print.ClusteringConfig


# %% KMeansConfig ----
#' @title KMeansConfig
#'
#' @description
#' ClusteringConfig subclass for K-means Clustering.
#'
#' @author EDG
#' @keywords internal
#' @noRd
KMeansConfig <- new_class(
  name = "KMeansConfig",
  parent = ClusteringConfig,
  constructor = function(k, dist) {
    k <- clean_posint(k)
    check_inherits(dist, "character")
    new_object(
      ClusteringConfig(
        algorithm = "KMeans",
        config = list(
          k = k,
          dist = dist
        )
      )
    )
  }
) # /rtemis::KMeansConfig


# %% setup_KMeans ----
#' Setup KMeansConfig
#'
#' @param k Number of clusters.
#' @param dist Character: Distance measure to use: 'euclidean' or 'manhattan'.
#'
#' @return KMeansConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' kmeans_config <- setup_KMeans(k = 4L, dist = "euclidean")
#' kmeans_config
setup_KMeans <- function(k = 3L, dist = c("euclidean", "manhattan")) {
  k <- clean_posint(k)
  dist <- match.arg(dist)
  KMeansConfig(k, dist)
} # /rtemis::setup_KMeans


# %% HardCLConfig ----
#' @title HardCLConfig
#'
#' @description
#' ClusteringConfig subclass for HardCL Clustering.
#'
#' @author EDG
#' @keywords internal
#' @noRd
HardCLConfig <- new_class(
  name = "HardCLConfig",
  parent = ClusteringConfig,
  constructor = function(k, dist) {
    k <- clean_posint(k)
    check_inherits(dist, "character")
    new_object(
      ClusteringConfig(
        algorithm = "HardCL",
        config = list(
          k = k,
          dist = dist
        )
      )
    )
  }
) # /rtemis::HardCLConfig


# %% setup_HardCL ----
#' Setup HardCLConfig
#'
#' @param k Number of clusters.
#' @param dist Character: Distance measure to use: 'euclidean' or 'manhattan'.
#'
#' @return HardCLConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' hardcl_config <- setup_HardCL(k = 4L, dist = "euclidean")
#' hardcl_config
setup_HardCL <- function(k = 3L, dist = c("euclidean", "manhattan")) {
  k <- clean_posint(k)
  dist <- match.arg(dist)
  HardCLConfig(k, dist)
} # /rtemis::setup_HardCL


# %% NeuralGasConfig ----
#' @title NeuralGasConfig
#'
#' @description
#' ClusteringConfig subclass for Neural Gas Clustering.
#'
#' @author EDG
#' @keywords internal
#' @noRd
NeuralGasConfig <- new_class(
  name = "NeuralGasConfig",
  parent = ClusteringConfig,
  constructor = function(k, dist) {
    k <- clean_posint(k)
    check_inherits(dist, "character")
    new_object(
      ClusteringConfig(
        algorithm = "NeuralGas",
        config = list(
          k = k,
          dist = dist
        )
      )
    )
  }
) # /rtemis::NeuralGasConfig


# %% setup_NeuralGas ----
#' Setup NeuralGasConfig
#'
#' @param k Number of clusters.
#' @param dist Character: Distance measure to use: 'euclidean' or 'manhattan'.
#'
#' @return NeuralGasConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' neuralgas_config <- setup_NeuralGas(k = 4L, dist = "euclidean")
#' neuralgas_config
setup_NeuralGas <- function(k = 3L, dist = c("euclidean", "manhattan")) {
  k <- clean_posint(k)
  dist <- match.arg(dist)
  NeuralGasConfig(k, dist)
} # /rtemis::setup_NeuralGas


# %% CMeansConfig ----
#' @title CMeansConfig
#'
#' @description
#' ClusteringConfig subclass for CMeans Clustering.
#'
#' @author EDG
#' @keywords internal
#' @noRd
CMeansConfig <- new_class(
  name = "CMeansConfig",
  parent = ClusteringConfig,
  constructor = function(
    k,
    max_iter,
    dist,
    method,
    m,
    rate_par,
    weights,
    control
  ) {
    k <- clean_posint(k)
    max_iter <- clean_posint(max_iter)
    check_character(dist)
    check_character(method)
    check_floatpos(m)
    check_float01inc(rate_par)
    check_inherits(weights, "numeric")
    check_inherits(control, "list")
    new_object(
      ClusteringConfig(
        algorithm = "CMeans",
        config = list(
          k = k,
          max_iter = max_iter,
          dist = dist,
          method = method,
          m = m,
          rate_par = rate_par,
          weights = weights,
          control = control
        )
      )
    )
  }
) # /rtemis::CMeansConfig


# %% setup_CMeans ----
#' Setup CMeansConfig
#'
#' @param k Integer: Number of clusters.
#' @param max_iter Integer: Maximum number of iterations.
#' @param dist Character: Distance measure to use: 'euclidean' or 'manhattan'.
#' @param method Character: "cmeans" - fuzzy c-means clustering; "ufcl": on-line update.
#' @param m Float (>1): Degree of fuzzification.
#' @param rate_par Float (0, 1): Learning rate for the online variant.
#' @param weights Float (>0): Case weights.
#' @param control List: Control config for clustering algorithm.
#'
#' @return CMeansConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' cmeans_config <- setup_CMeans(k = 4L, dist = "euclidean")
#' cmeans_config
setup_CMeans <- function(
  k = 2L,
  max_iter = 100L,
  dist = c("euclidean", "manhattan"),
  method = c("cmeans", "ufcl"),
  m = 2.0,
  rate_par = NULL,
  weights = 1.0,
  control = list()
) {
  k <- clean_posint(k)
  max_iter <- clean_posint(max_iter)
  dist <- match.arg(dist)
  method <- match.arg(method)
  check_floatpos(m)
  stopifnot(m > 1)
  check_float01inc(rate_par)
  check_inherits(weights, "numeric")
  CMeansConfig(
    k = k,
    max_iter = max_iter,
    dist = dist,
    method = method,
    m = m,
    rate_par = rate_par,
    weights = weights,
    control = control
  )
} # /rtemis::setup_CMeans


# %% DBSCANConfig ----
#' @title DBSCANConfig
#'
#' @description
#' ClusteringConfig subclass for DBSCAN Clustering.
#'
#' @author EDG
#' @keywords internal
#' @noRd
DBSCANConfig <- new_class(
  name = "DBSCANConfig",
  parent = ClusteringConfig,
  constructor = function(
    eps,
    min_points,
    weights,
    border_points,
    search,
    bucket_size,
    split_rule,
    approx
  ) {
    check_floatpos(eps)
    min_points <- clean_posint(min_points)
    check_inherits(weights, "numeric")
    check_inherits(border_points, "logical")
    check_inherits(search, "character")
    check_inherits(bucket_size, "integer")
    check_inherits(split_rule, "character")
    check_inherits(approx, "logical")
    new_object(
      ClusteringConfig(
        algorithm = "DBSCAN",
        config = list(
          eps = eps,
          min_points = min_points,
          weights = weights,
          border_points = border_points,
          search = search,
          bucket_size = bucket_size,
          split_rule = split_rule,
          approx = approx
        )
      )
    )
  }
) # /rtemis::DBSCANConfig


# %% setup_DBSCAN ----
#' Setup DBSCANConfig
#'
#' @param eps Float: Radius of neighborhood.
#' @param min_points Integer: Minimum number of points in a neighborhood to form a cluster.
#' @param weights Numeric vector: Weights for data points.
#' @param border_points Logical: If TRUE, assign border points to clusters.
#' @param search Character: Nearest neighbor search strategy: "kdtree", "linear", or "dist".
#' @param bucket_size Integer: Size of buckets for k-dtree search.
#' @param split_rule Character: Rule for splitting clusters: "SUGGEST", "STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR".
#' @param approx Logical: If TRUE, use approximate nearest neighbor search.
#' @return DBSCANConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' dbscan_config <- setup_DBSCAN(eps = 0.5, min_points = 5L)
#' dbscan_config
setup_DBSCAN <- function(
  eps = 0.5,
  min_points = 5L,
  weights = NULL,
  border_points = TRUE,
  search = c("kdtree", "linear", "dist"),
  bucket_size = 100L,
  split_rule = c("SUGGEST", "STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR"),
  approx = FALSE
) {
  check_floatpos(eps)
  min_points <- clean_posint(min_points)
  check_inherits(weights, "numeric")
  check_inherits(border_points, "logical")
  search <- match.arg(search)
  check_inherits(bucket_size, "integer")
  split_rule <- match.arg(split_rule)
  check_inherits(approx, "logical")
  DBSCANConfig(
    eps = eps,
    min_points = min_points,
    weights = weights,
    border_points = border_points,
    search = search,
    bucket_size = bucket_size,
    split_rule = split_rule,
    approx = approx
  )
} # /rtemis::setup_DBSCAN


================================================
FILE: R/10_Clustering.R
================================================
# S7_Clustering.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% Clustering ----
#' @title Clustering
#'
#' @description
#' Clustering class.
#'
#' @field algorithm Character: Algorithm name.
#' @field clust Any: Clustering object.
#' @field k Integer: Number of clusters.
#' @field clusters List: Cluster assignment.
#' @field config ClusteringConfig: Algorithm-specific config.
#'
#' @author EDG
#' @noRd
Clustering <- new_class(
  name = "Clustering",
  properties = list(
    algorithm = class_character,
    clust = class_any,
    k = class_integer,
    clusters = class_integer | class_list,
    config = ClusteringConfig
  )
) # /Clustering


# %% `$`.Clustering ----
# Make Clustering props `$`-accessible
method(`$`, Clustering) <- function(x, name) {
  prop(x, name)
}


# %% `.DollarNames`.Clustering ----
# `$`-autocomplete Clustering props
method(`.DollarNames`, Clustering) <- function(x, pattern = "") {
  prop_names <- names(props(x))
  grep(pattern, prop_names, value = TRUE)
}


# %% `[[`.Clustering ----
# Make Clustering props `[[`-accessible
method(`[[`, Clustering) <- function(x, index) {
  prop(x, index)
}


# %% repr.Clustering ----
method(repr, Clustering) <- function(
  x,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name(paste(x@algorithm, "Clustering")),
    repr_ls(props(x)[-1], pad = pad, output_type = output_type)
  )
} # /rtemis::repr.Clustering


# %% print.Clustering ----
method(print, Clustering) <- function(
  x,
  pad = 0L,
  output_type = NULL,
  ...
) {
  cat(repr(x, pad = pad, output_type = output_type))
  invisible(x)
} # /rtemis::print.Clustering


================================================
FILE: R/11_DecompositionConfig.R
================================================
# S7_DecompositionConfig.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% DecompositionConfig ----
#' @title DecompositionConfig
#'
#' @description
#' Decomposition config class.
#'
#' @field algorithm Character: Algorithm name.
#' @field config List: Algorithm-specific config.
#'
#' @author EDG
#' @noRd
DecompositionConfig <- new_class(
  name = "DecompositionConfig",
  properties = list(
    algorithm = class_character,
    config = class_list
  )
) # /DecompositionConfig


# %% `$`.DecompositionConfig ----
# Make DecompositionConfig@config `$`-accessible ----
method(`$`, DecompositionConfig) <- function(x, name) {
  x@config[[name]]
}


# %% `.DollarNames`.DecompositionConfig ----
# `$`-autocomplete DecompositionConfig@config ----
method(`.DollarNames`, DecompositionConfig) <- function(x, pattern = "") {
  all_names <- names(x@config)
  grep(pattern, all_names, value = TRUE)
}


# %% `[`.DecompositionConfig ----
# Make props `[`-accessible ----
method(`[`, DecompositionConfig) <- function(x, name) {
  props(x)[[name]]
}


# %% `[[`.DecompositionConfig ----
# Make DecompositionConfig@config `[[`-accessible ----
method(`[[`, DecompositionConfig) <- function(x, name) {
  x@config[[name]]
}


# %% repr.DecompositionConfig ----
#' Show Method for DecompositionConfig
#'
#' @param object DecompositionConfig object.
#' @param pad Integer: Left side padding.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return character
#'
#' @author EDG
#' @noRd
method(repr, DecompositionConfig) <- function(
  x,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name(
      paste(x["algorithm"], "DecompositionConfig"),
      pad = pad,
      output_type = output_type
    ),
    repr_ls(x["config"], pad = pad, limit = -1L, output_type = output_type)
  )
} # /rtemis::repr.DecompositionConfig


# %% print.DecompositionConfig ----
#' Print Method for DecompositionConfig
#'
#' @param x DecompositionConfig object.
#' @param pad Integer: Left side padding.
#' @param ... Not used.
#'
#' @return DecompositionConfig object, invisibly.
#'
#' @author EDG
#' @noRd
method(print, DecompositionConfig) <- function(
  x,
  pad = 0L,
  output_type = NULL,
  ...
) {
  cat(repr(x, pad = pad, output_type = output_type))
  invisible(x)
}


# %% PCAConfig ----
#' @title PCAConfig
#'
#' @description
#' DecompositionConfig subclass for Principal Component Analysis.
#' Internal use only.
#'
#' @author EDG
#' @noRd
PCAConfig <- new_class(
  name = "PCAConfig",
  parent = DecompositionConfig,
  constructor = function(k, center, scale, tol) {
    k <- clean_posint(k)
    check_logical(center)
    check_logical(scale)
    check_float0pos(tol)
    new_object(
      DecompositionConfig(
        algorithm = "PCA",
        config = list(
          k = k,
          center = center,
          scale = scale,
          tol = tol
        )
      )
    )
  }
) # /rtemis::PCAConfig


# %% setup_PCA ----
#' Setup PCA config.
#'
#' @param k Integer: Number of components. (passed to `prcomp` `rank.`)
#' @param center Logical: If TRUE, center the data.
#' @param scale Logical: If TRUE, scale the data.
#' @param tol Numeric: Tolerance.
#'
#' @return PCAConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' pca_config <- setup_PCA(k = 3L)
#' pca_config
setup_PCA <- function(k = 3L, center = TRUE, scale = TRUE, tol = NULL) {
  k <- clean_posint(k)
  check_logical(center)
  check_logical(scale)
  check_float0pos(tol)
  PCAConfig(k, center, scale, tol)
} # /rtemis::setup_PCA


# %% ICAConfig ----
#' @title ICAConfig
#'
#' @description
#' DecompositionConfig subclass for Independent Component Analysis.
#' Internal use only.
#'
#' @author EDG
#' @noRd
ICAConfig <- new_class(
  name = "ICAConfig",
  parent = DecompositionConfig,
  constructor = function(k, type, fun, alpha, row_norm, maxit, tol) {
    new_object(
      DecompositionConfig(
        algorithm = "ICA",
        config = list(
          k = k,
          type = type,
          fun = fun,
          alpha = alpha,
          row_norm = row_norm,
          maxit = maxit,
          tol = tol
        )
      )
    )
  }
) # /rtemis::ICAConfig


# %% setup_ICA ----
#' @title setup_ICA
#'
#' @description
#' Setup ICA config.
#'
#' @param k Integer: Number of components.
#' @param type Character: Type of ICA: "parallel" or "deflation".
#' @param fun Character: ICA function: "logcosh", "exp".
#' @param alpha Numeric \[1, 2\]: Used in approximation to neg-entropy with `fun = "logcosh"`.
#' @param row_norm Logical: If TRUE, normalize rows of `x` before ICA.
#' @param maxit Integer: Maximum number of iterations.
#' @param tol Numeric: Tolerance.
#'
#' @return ICAConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' ica_config <- setup_ICA(k = 3L)
#' ica_config
setup_ICA <- function(
  k = 3L,
  type = c("parallel", "deflation"),
  fun = c("logcosh", "exp"),
  alpha = 1.0,
  row_norm = TRUE,
  maxit = 100L,
  tol = 1e-04
) {
  k <- clean_posint(k)
  type <- match.arg(type)
  fun <- match.arg(fun)
  stopifnot(alpha >= 1, alpha <= 2)
  check_inherits(row_norm, "logical")
  maxit <- clean_posint(maxit)
  check_inherits(tol, "numeric")
  ICAConfig(
    k = k,
    type = type,
    fun = fun,
    alpha = alpha,
    row_norm = row_norm,
    maxit = maxit,
    tol = tol
  )
} # /rtemis::setup_ICA


# %% NMFConfig ----
#' @title NMFConfig
#'
#' @description
#' DecompositionConfig subclass for Non-negative Matrix Factorization.
#' Internal use only.
#'
#' @author EDG
#' @noRd
NMFConfig <- new_class(
  name = "NMFConfig",
  parent = DecompositionConfig,
  constructor = function(k, method, nrun) {
    k <- clean_posint(k)
    check_inherits(method, "character")
    nrun <- clean_posint(nrun)
    new_object(
      DecompositionConfig(
        algorithm = "NMF",
        config = list(
          k = k,
          method = method,
          nrun = nrun
        )
      )
    )
  }
) # /rtemis::NMFConfig


# %% setup_NMF ----
#' Setup NMF config.
#'
#' @param k Integer: Number of components.
#' @param method Character: NMF method. See `NMF::nmf`.
#' @param nrun Integer: Number of runs to perform.
#'
#' @return NMFConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' nmf_config <- setup_NMF(k = 3L)
#' nmf_config
setup_NMF <- function(
  k = 2L,
  method = "brunet",
  nrun = if (length(k) > 1L) 30L else 1L
) {
  k <- clean_posint(k)
  check_inherits(method, "character")
  nrun <- clean_posint(nrun)
  NMFConfig(k, method, nrun)
} # /rtemis::setup_NMF


# %% UMAPConfig ----
#' @title UMAPConfig
#'
#' @description
#' DecompositionConfig subclass for Uniform Manifold Approximation and Projection.
#' Internal use only.
#'
#' @author EDG
#' @noRd
UMAPConfig <- new_class(
  name = "UMAPConfig",
  parent = DecompositionConfig,
  constructor = function(
    k,
    n_neighbors,
    init,
    metric,
    n_epochs,
    learning_rate,
    scale
  ) {
    k <- clean_posint(k)
    n_neighbors <- clean_posint(n_neighbors)
    check_inherits(init, "character")
    check_inherits(metric, "character")
    n_epochs <- clean_posint(n_epochs)
    check_float0pos(learning_rate)
    check_inherits(scale, "logical")
    new_object(
      DecompositionConfig(
        algorithm = "UMAP",
        config = list(
          k = k,
          n_neighbors = n_neighbors,
          init = init,
          metric = metric,
          n_epochs = n_epochs,
          learning_rate = learning_rate,
          scale = scale
        )
      )
    )
  }
) # /rtemis::UMAPConfig


# %% setup_UMAP ----
#' Setup UMAP config.
#'
#' @details
#' A high `n_neighbors` value may give error in some systems:
#' "Error in irlba::irlba(L, nv = n, nu = 0, maxit = iters) :
#'  function 'as_cholmod_sparse' not provided by package 'Matrix'"
#'
#' @param k Integer: Number of components.
#' @param n_neighbors Integer: Number of keighbors.
#' @param init Character: Initialization type. See `uwot::umap "init"`.
#' @param metric Character: Distance metric to use: "euclidean", "cosine",
#' "manhattan", "hamming", "categorical".
#' @param n_epochs Integer: Number of epochs.
#' @param learning_rate Float: Learning rate.
#' @param scale Logical: If TRUE, scale input data before doing UMAP.
#'
#' @return UMAPConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' umap_config <- setup_UMAP(k = 3L)
#' umap_config
setup_UMAP <- function(
  k = 2L,
  n_neighbors = 15L,
  init = "spectral",
  metric = c("euclidean", "cosine", "manhattan", "hamming", "categorical"),
  n_epochs = NULL,
  learning_rate = 1.0,
  scale = TRUE
) {
  k <- clean_posint(k)
  n_neighbors <- clean_posint(n_neighbors)
  init <- match.arg(init)
  metric <- match.arg(metric)
  check_inherits(n_epochs, "integer")
  check_float0pos(learning_rate)
  check_inherits(scale, "logical")
  UMAPConfig(
    k = k,
    n_neighbors = n_neighbors,
    init = init,
    metric = metric,
    n_epochs = n_epochs,
    learning_rate = learning_rate,
    scale = scale
  )
} # /rtemis::setup_UMAP


# %% tSNEConfig ----
#' @title tSNEConfig
#'
#' @description
#' DecompositionConfig subclass for t-Distributed Stochastic Neighbor Embedding.
#'
#' @author EDG
#' @noRd
tSNEConfig <- new_class(
  name = "tSNEConfig",
  parent = DecompositionConfig,
  constructor = function(
    k = NULL,
    initial_dims = NULL,
    perplexity = NULL,
    theta = NULL,
    check_duplicates = NULL,
    pca = NULL,
    partial_pca = NULL,
    max_iter = NULL,
    verbose = NULL,
    is_distance = NULL,
    Y_init = NULL,
    pca_center = NULL,
    pca_scale = NULL,
    normalize = NULL,
    stop_lying_iter = NULL,
    mom_switch_iter = NULL,
    momentum = NULL,
    final_momentum = NULL,
    eta = NULL,
    exaggeration_factor = NULL,
    num_threads = NULL
  ) {
    k <- clean_posint(k)
    initial_dims <- clean_posint(initial_dims)
    check_logical(check_duplicates)
    check_logical(pca)
    check_logical(partial_pca)
    max_iter <- clean_posint(max_iter)
    check_logical(verbose)
    check_logical(is_distance)
    check_inherits(Y_init, "matrix")
    check_logical(pca_center)
    check_logical(pca_scale)
    check_logical(normalize)
    stop_lying_iter <- clean_posint(stop_lying_iter)
    mom_switch_iter <- clean_posint(mom_switch_iter)
    num_threads <- clean_posint(num_threads)
    new_object(
      DecompositionConfig(
        algorithm = "tSNE",
        config = list(
          k = k,
          initial_dims = initial_dims,
          perplexity = perplexity,
          theta = theta,
          check_duplicates = check_duplicates,
          pca = pca,
          partial_pca = partial_pca,
          max_iter = max_iter,
          verbose = verbose,
          is_distance = is_distance,
          Y_init = Y_init,
          pca_center = pca_center,
          pca_scale = pca_scale,
          normalize = normalize,
          stop_lying_iter = stop_lying_iter,
          mom_switch_iter = mom_switch_iter,
          momentum = momentum,
          final_momentum = final_momentum,
          eta = eta,
          exaggeration_factor = exaggeration_factor,
          num_threads = num_threads
        )
      )
    )
  }
) # /rtemis::tSNEConfig


# %% setup_tSNE ----
#' Setup tSNE config.
#'
#' @details
#' Get more information on the config by running `?Rtsne::Rtsne`.
#'
#' @param k Integer: Number of components.
#' @param initial_dims Integer: Initial dimensions.
#' @param perplexity Integer: Perplexity.
#' @param theta Float: Theta.
#' @param check_duplicates Logical: If TRUE, check for duplicates.
#' @param pca Logical: If TRUE, perform PCA.
#' @param partial_pca Logical: If TRUE, perform partial PCA.
#' @param max_iter Integer: Maximum number of iterations.
#' @param verbose Logical: If TRUE, print messages.
#' @param is_distance Logical: If TRUE, `x` is a distance matrix.
#' @param Y_init Matrix: Initial Y matrix.
#' @param pca_center Logical: If TRUE, center PCA.
#' @param pca_scale Logical: If TRUE, scale PCA.
#' @param normalize Logical: If TRUE, normalize.
#' @param stop_lying_iter Integer: Stop lying iterations.
#' @param mom_switch_iter Integer: Momentum switch iterations.
#' @param momentum Float: Momentum.
#' @param final_momentum Float: Final momentum.
#' @param eta Float: Eta.
#' @param exaggeration_factor Float: Exaggeration factor.
#' @param num_threads Integer: Number of threads.
#'
#' @return tSNEConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' tSNE_config <- setup_tSNE(k = 3L)
#' tSNE_config
setup_tSNE <- function(
  k = 2L,
  initial_dims = 50L,
  perplexity = 30,
  theta = 0.5,
  check_duplicates = TRUE,
  pca = TRUE,
  partial_pca = FALSE,
  max_iter = 1000L,
  verbose = getOption("verbose", FALSE),
  is_distance = FALSE,
  Y_init = NULL,
  pca_center = TRUE,
  pca_scale = FALSE,
  normalize = TRUE,
  stop_lying_iter = ifelse(is.null(Y_init), 250L, 0L),
  mom_switch_iter = ifelse(is.null(Y_init), 250L, 0L),
  momentum = 0.5,
  final_momentum = 0.8,
  eta = 200,
  exaggeration_factor = 12,
  num_threads = 1L
) {
  tSNEConfig(
    k = k,
    initial_dims = initial_dims,
    perplexity = perplexity,
    theta = theta,
    check_duplicates = check_duplicates,
    pca = pca,
    partial_pca = partial_pca,
    max_iter = max_iter,
    verbose = verbose,
    is_distance = is_distance,
    Y_init = Y_init,
    pca_center = pca_center,
    pca_scale = pca_scale,
    normalize = normalize,
    stop_lying_iter = stop_lying_iter,
    mom_switch_iter = mom_switch_iter,
    momentum = momentum,
    final_momentum = final_momentum,
    eta = eta,
    exaggeration_factor = exaggeration_factor,
    num_threads = num_threads
  )
} # /rtemis::setup_tSNE


# %% IsomapConfig ----
#' @title IsomapConfig
#'
#' @description
#' DecompositionConfig subclass for Isomap.
#'
#' @author EDG
#' @noRd
IsomapConfig <- new_class(
  name = "IsomapConfig",
  parent = DecompositionConfig,
  constructor = function(
    k,
    dist_method = NULL,
    nsd = NULL,
    path = NULL
  ) {
    k <- clean_posint(k)
    check_inherits(dist_method, "character")
    nsd <- clean_int(nsd)
    check_inherits(path, "character")
    new_object(
      DecompositionConfig(
        algorithm = "Isomap",
        config = list(
          k = k,
          dist_method = dist_method,
          nsd = nsd,
          path = path
        )
      )
    )
  }
) # /rtemis::IsomapConfig


# %% setup_Isomap ----
#' Setup Isomap config.
#'
#' @param k Integer: Number of components.
#' @param dist_method Character: Distance method.
#' @param nsd Integer: Number of shortest dissimilarities retained.
#' @param path Character: Path argument for `vegan::isomap`.
#'
#' @return IsomapConfig object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' isomap_config <- setup_Isomap(k = 3L)
#' isomap_config
setup_Isomap <- function(
  k = 2L,
  dist_method = c("euclidean", "manhattan"),
  nsd = 0L,
  path = c("shortest", "extended")
) {
  k <- clean_posint(k)
  dist_method <- match.arg(dist_method)
  nsd <- clean_int(nsd)
  path <- match.arg(path)
  IsomapConfig(k, dist_method, nsd, path)
} # /rtemis::setup_Isomap


================================================
FILE: R/12_Decomposition.R
================================================
# S7_Decomposition.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% Decomposition ----
#' @title Decomposition
#'
#' @description
#' Decomposition class.
#'
#' @field algorithm Character: Algorithm name.
#' @field decom Any: Decomposition object.
#' @field config List: Algorithm-specific config.
#' @field decom: Decomposition model.
#' @field transformed: transformedransformed data, i.e. either a projection or an embedding of the input data.
#'
#' @author EDG
#' @noRd
Decomposition <- new_class(
  name = "Decomposition",
  properties = list(
    algorithm = class_character,
    config = DecompositionConfig,
    decom = class_any,
    transformed = class_any
  )
) # /rtemis::Decomposition


# %% `$`.Decomposition ----
# Make Decomposition properties `$`-accessible
method(`$`, Decomposition) <- function(x, name) {
  prop_names <- names(props(x))
  if (name %in% prop_names) {
    prop(x, name)
  } else {
    cli::cli_abort(paste0(
      "No property named '",
      name,
      "' in Decomposition object."
    ))
  }
}


# %% `.DollarNames`.Decomposition ----
method(`.DollarNames`, Decomposition) <- function(x, pattern = "") {
  prop_names <- names(props(x))
  grep(pattern, prop_names, value = TRUE)
}


# %% `[[`.Decomposition ----
# Make Decomposition@transformed `[[`-accessible
method(`[[`, Decomposition) <- function(x, index) {
  props(x, "transformed")[[index]]
}


# %% repr.Decomposition ----
method(repr, Decomposition) <- function(
  x,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name(
      paste(x@algorithm, "Decomposition"),
      pad = pad,
      output_type = output_type
    ),
    repr_ls(props(x)[-1], pad = pad, output_type = output_type)
  )
} # /rtemis::repr.Decomposition


# %% print.Decomposition ----
method(print, Decomposition) <- function(
  x,
  pad = 0L,
  output_type = NULL,
  ...
) {
  cat(repr(x, pad = pad, output_type = output_type))
  invisible(x)
} # /rtemis::print.Decomposition


================================================
FILE: R/13_Themes.R
================================================
# S7_Themes.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% Theme ----
#' Theme
#'
#' @field name Character: Name of theme.
#' @field config Named list of theme config.
#'
#' @author EDG
#' @noRd
Theme <- new_class(
  name = "Theme",
  properties = list(
    name = class_character,
    config = class_list
  )
) # /Theme


# %% print.Theme ----
#' Print Theme
#'
#' Print Theme object
#'
#' @param x `Theme` object.
#' @param ... Not used.
#'
#' @author EDG
#' @noRd
method(print, Theme) <- function(x, ...) {
  objcat(paste(x@name, "Theme"))
  printls(props(x)[["config"]])
  invisible(x)
}


# %% `$`.Theme ----
# Make Theme@config `$`-accessible with autocomplete ----
method(`$`, Theme) <- function(x, name) {
  x@config[[name]]
} # /rtemis::Theme$


# %% `.DollarNames`.Theme ----
method(`.DollarNames`, Theme) <- function(x, pattern = "") {
  all_names <- names(x@config)
  grep(pattern, all_names, value = TRUE)
} # /rtemis::Theme.DollarNames


# %% `[[`.Theme ----
# Make Theme@config `[[`-accessible ----
method(`[[`, Theme) <- function(x, name) {
  x@config[[name]]
} # /rtemis::Theme[[]]


# %% names.Theme ----
#' Get names of Theme object
#'
#' @param x `Theme` object.
#'
#' @return Character vector of names of `Theme` object.
#'
#' @author EDG
#' @noRd
method(names, Theme) <- function(x) {
  names(x@config)
} # /rtemis::names.Theme


================================================
FILE: R/14_SuperConfig.R
================================================
# S7_SuperConfig.R
# ::rtemis::
# 2025- EDG rtemis.org

# References ----
# https://github.com/RConsortium/S7
# https://rconsortium.github.io/S7/

# %% SuperConfig ----
#' SuperConfig Class
#'
#' @description
#' Supervised Learning Configuration Class.
#'
#' @author EDG
#' @noRd
SuperConfig <- new_class(
  name = "SuperConfig",
  properties = list(
    dat_training_path = class_character,
    dat_validation_path = class_character | NULL,
    dat_test_path = class_character | NULL,
    weights = class_character | NULL, # column name in dat_training
    preprocessor_config = PreprocessorConfig | NULL,
    algorithm = class_character | NULL,
    hyperparameters = Hyperparameters | NULL,
    tuner_config = TunerConfig | NULL,
    outer_resampling_config = ResamplerConfig | NULL,
    execution_config = ExecutionConfig,
    question = class_character | NULL,
    outdir = class_character,
    verbosity = class_integer
  )
) # /rtemis::SuperConfig


# %% repr.SuperConfig ----
#' Repr SuperConfig
#'
#' @param x `SuperConfig` object.
#' @param pad Integer: Number of spaces to pad the message with.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted string that can be printed with cat()
#'
#' @author EDG
#' @noRd
method(repr, SuperConfig) <- function(x, pad = 0L, output_type = NULL) {
  out <- repr_S7name("SuperConfig", pad = pad, output_type = output_type)
  out <- paste0(
    out,
    repr_ls(props(x), pad = pad, limit = 20L, output_type = output_type)
  )
  out
} # /rtemis::repr.SuperConfig


# %% print.SuperConfig ----
#' Print `SuperConfig`
#'
#' Print `SuperConfig` object
#'
#' @param x `SuperConfig` object.
#' @param ... Not used.
#'
#' @author EDG
#' @noRd
method(print, SuperConfig) <- function(x, output_type = NULL, ...) {
  cat(repr(x, output_type = output_type))
  invisible(x)
} # /rtemis::print.SuperConfig


# %% setup_SuperConfig ----
#' Setup SuperConfig
#'
#' Setup `SuperConfig` object.
#'
#' @param dat_training_path Character: Path to training data file.
#' @param dat_validation_path Character: Path to validation data file.
#' @param dat_test_path Character: Path to test data file.
#' @param weights Optional Character: Column name in training data to use as observation weights.
#' If NULL, no weights are used.
#' @param preprocessor_config `PreprocessorConfig` object: Configuration for data preprocessing.
#' @param algorithm Character: Algorithm to use for training.
#' @param hyperparameters `Hyperparameters` object: Configuration for model hyperparameters.
#' @param tuner_config `TunerConfig` object: Configuration for hyperparameter tuning.
#' @param outer_resampling_config `ResamplerConfig` object: Configuration for outer res
#' resampling during model training.
#' @param execution_config `ExecutionConfig` object: Configuration for execution settings. Setup
#' with [setup_ExecutionConfig].
#' @param question Character: Question to answer with the supervised learning analysis.
#' @param outdir Character: Output directory for results.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `SuperConfig` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' sc <- setup_SuperConfig(
#'   dat_training_path = "train.csv",
#'   preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE),
#'   algorithm = "LightRF",
#'   hyperparameters = setup_LightRF(),
#'   tuner_config = setup_GridSearch(),
#'   outer_resampling_config = setup_Resampler(),
#'   execution_config = setup_ExecutionConfig(),
#'   question = "Can we tell iris species apart given their measurements?",
#'   outdir = "models/"
#' )
setup_SuperConfig <- function(
  dat_training_path,
  dat_validation_path = NULL,
  dat_test_path = NULL,
  weights = NULL,
  preprocessor_config = NULL,
  algorithm = NULL,
  hyperparameters = NULL,
  tuner_config = NULL,
  outer_resampling_config = NULL,
  execution_config = setup_ExecutionConfig(),
  question = NULL,
  outdir = "results/",
  verbosity = 1L
) {
  # Sanitize paths for security
  dat_training_path <- sanitize_path(dat_training_path, must_exist = FALSE)

  if (!is.null(dat_validation_path)) {
    dat_validation_path <- sanitize_path(
      dat_validation_path,
      must_exist = FALSE
    )
  }

  if (!is.null(dat_test_path)) {
    dat_test_path <- sanitize_path(dat_test_path, must_exist = FALSE)
  }

  outdir <- sanitize_path(outdir, must_exist = FALSE, type = "any")

  SuperConfig(
    dat_training_path = dat_training_path,
    dat_validation_path = dat_validation_path,
    dat_test_path = dat_test_path,
    weights = weights,
    preprocessor_config = preprocessor_config,
    algorithm = algorithm,
    hyperparameters = hyperparameters,
    tuner_config = tuner_config,
    outer_resampling_config = outer_resampling_config,
    execution_config = execution_config,
    question = question,
    outdir = outdir,
    verbosity = verbosity
  )
} # /setup_SuperConfig


# %% to_toml.SuperConfig ----
#' Convert `SuperConfig` to TOML
#'
#' Convert `SuperConfig` object to TOML format for saving to file that can be read back in with
#' `read_config()`.
#'
#' @param x `SuperConfig` object.
#'
#' @return Character: TOML string representation of the `SuperConfig` object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(to_toml, SuperConfig) <- function(x) {
  check_dependencies("toml")
  xl <- S7_to_list(props(x))
  toml_with_meta(x, xl)
} # /rtemis::to_toml.SuperConfig


# %% write_toml.SuperConfig ----
#' @name
#' write_toml
#'
#' @param x `SuperConfig` object.
#' @param file Character: Path to output TOML file.
#' @param overwrite Logical: If TRUE, overwrite existing file.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `SuperConfig` object, invisibly.
#'
#' @author EDG
#' @rdname write_toml
#'
#' @examples
#' x <- setup_SuperConfig(
#'   dat_training_path = "~/Data/iris.csv",
#'   dat_validation_path = NULL,
#'   dat_test_path = NULL,
#'   weights = NULL,
#'   preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE),
#'   algorithm = "LightRF",
#'   hyperparameters = setup_LightRF(),
#'   tuner_config = setup_GridSearch(),
#'   outer_resampling_config = setup_Resampler(),
#'   execution_config = setup_ExecutionConfig(),
#'   question = "Can we tell iris species apart given their measurements?",
#'   outdir = "models/",
#'   verbosity = 1L
#' )
#' tmpdir <- tempdir()
#' write_toml(x, file.path(tmpdir, "rtemis.toml"))
method(write_toml, SuperConfig) <- function(
  x,
  file,
  overwrite = FALSE,
  verbosity = 1L
) {
  toml_str <- to_toml(x)
  write_lines(
    toml_str,
    file = file,
    overwrite = overwrite,
    verbosity = verbosity
  )
  invisible(x)
} # /rtemis::write_toml.SuperConfig


# %% read_config ----
#' Read `SuperConfig` from TOML file
#'
#' Read `SuperConfig` object from TOML file that was written with `write_toml()`.
#'
#' @param file Character: Path to input TOML file.
#'
#' @return `SuperConfig` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # Create a SuperConfig object
#' x <- setup_SuperConfig(
#'   dat_training_path = "~/Data/iris.csv",
#'   algorithm = "LightRF",
#'   hyperparameters = setup_LightRF()
#' )
#' # Write TOML file
#' tmpdir <- tempdir()
#' tmpfile <- file.path(tmpdir, "rtemis_test.toml")
#' write_toml(x, tmpfile)
#' # Read config from TOML file
#' x_read <- read_config(tmpfile)
read_config <- function(file) {
  check_dependencies("toml")
  file <- sanitize_path(file, must_exist = TRUE, type = "file")
  xl <- toml::read_toml(file)
  xl <- toml_empty_to_null(xl)
  # Convert list to SuperConfig object

  setup_SuperConfig(
    dat_training_path = xl[["dat_training_path"]],
    dat_validation_path = xl[["dat_validation_path"]],
    dat_test_path = xl[["dat_test_path"]],
    weights = xl[["weights"]],
    preprocessor_config = if (is.null(xl[["preprocessor_config"]])) {
      NULL
    } else {
      do.call(setup_Preprocessor, xl[["preprocessor_config"]])
    },
    algorithm = xl[["algorithm"]],
    hyperparameters = if (is.null(xl[["hyperparameters"]])) {
      NULL
    } else {
      .list_to_Hyperparameters(xl[["hyperparameters"]])
    },
    tuner_config = if (is.null(xl[["tuner_config"]])) {
      NULL
    } else {
      .list_to_TunerConfig(xl[["tuner_config"]])
    },
    outer_resampling_config = if (is.null(xl[["outer_resampling_config"]])) {
      NULL
    } else {
      .list_to_ResamplerConfig(xl[["outer_resampling_config"]])
    },
    execution_config = if (is.null(xl[["execution_config"]])) {
      setup_ExecutionConfig()
    } else {
      do.call(setup_ExecutionConfig, xl[["execution_config"]])
    },
    question = iflengthy(xl[["question"]]),
    outdir = iflengthy(xl[["outdir"]]),
    verbosity = iflengthy(xl[["verbosity"]])
  )
} # /rtemis::read_config


# %% to_yaml.SuperConfig ----
#' Convert `SuperConfig` to YAML
#'
#' Convert `SuperConfig` object to YAML format for saving to file that can be read back in with
#' `read_config()`.
#'
#' @param x `SuperConfig` object.
#'
#' @return Character: YAML string representation of the `SuperConfig` object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(to_yaml, SuperConfig) <- function(x) {
  xl <- S7_to_list(props(x))
  yaml::as.yaml(xl)
} # /rtemis::to_yaml.SuperConfig


# %% SuperConfigLive ----
#' SuperConfigLive Class
#'
#' @description
#' Like `SuperConfig`, but carries in-memory training/validation/test data
#' instead of file paths. Used by `rtemislive` (uploads arrive over a WS
#' frame, not as a file) and by future HPC submission paths that hand the
#' data directly to a worker.
#'
#' Not TOML-serialisable — in-memory data does not round-trip cleanly to
#' a config file. Use `SuperConfig` when you need on-disk reproducibility.
#'
#' @author EDG
#' @noRd
SuperConfigLive <- new_class(
  name = "SuperConfigLive",
  properties = list(
    dat_training = class_tabular,
    dat_validation = class_tabular | NULL,
    dat_test = class_tabular | NULL,
    weights = class_character | NULL, # column name in dat_training
    preprocessor_config = PreprocessorConfig | NULL,
    algorithm = class_character | NULL,
    hyperparameters = Hyperparameters | NULL,
    tuner_config = TunerConfig | NULL,
    outer_resampling_config = ResamplerConfig | NULL,
    execution_config = ExecutionConfig,
    question = class_character | NULL,
    outdir = class_character | NULL,
    verbosity = class_integer
  )
) # /rtemis::SuperConfigLive


# %% repr.SuperConfigLive ----
#' @author EDG
#' @noRd
method(repr, SuperConfigLive) <- function(x, pad = 0L, output_type = NULL) {
  out <- repr_S7name("SuperConfigLive", pad = pad, output_type = output_type)
  # Replace heavy data slots with a {rows, cols} summary so the printout
  # stays readable.
  pl <- props(x)
  fmt_dim <- function(d) {
    if (is.null(d)) {
      return(NULL)
    }
    paste0("<", NROW(d), " x ", NCOL(d), ">")
  }
  pl[["dat_training"]] <- fmt_dim(pl[["dat_training"]])
  pl[["dat_validation"]] <- fmt_dim(pl[["dat_validation"]])
  pl[["dat_test"]] <- fmt_dim(pl[["dat_test"]])
  out <- paste0(
    out,
    repr_ls(pl, pad = pad, limit = 20L, output_type = output_type)
  )
  out
} # /rtemis::repr.SuperConfigLive


# %% print.SuperConfigLive ----
#' @author EDG
#' @noRd
method(print, SuperConfigLive) <- function(x, output_type = NULL, ...) {
  cat(repr(x, output_type = output_type))
  invisible(x)
} # /rtemis::print.SuperConfigLive


# %% setup_SuperConfigLive ----
#' Setup SuperConfigLive
#'
#' Build a `SuperConfigLive` — same shape as [setup_SuperConfig] but with
#' in-memory tabular data instead of file paths.
#'
#' @param dat_training data.frame or data.table. Training data.
#' @param dat_validation data.frame, data.table, or `NULL`.
#' @param dat_test data.frame, data.table, or `NULL`.
#' @param weights Character or `NULL`. Column name in `dat_training` used
#'   as observation weights.
#' @param preprocessor_config,algorithm,hyperparameters,tuner_config,outer_resampling_config,execution_config,question,verbosity
#'   See [setup_SuperConfig].
#' @param outdir Character or `NULL`. Output directory; `NULL` (the
#'   default) means "do not write to disk" (the rtemislive case).
#'
#' @return `SuperConfigLive` object.
#'
#' @author EDG
#' @export
setup_SuperConfigLive <- function(
  dat_training,
  dat_validation = NULL,
  dat_test = NULL,
  weights = NULL,
  preprocessor_config = NULL,
  algorithm = NULL,
  hyperparameters = NULL,
  tuner_config = NULL,
  outer_resampling_config = NULL,
  execution_config = setup_ExecutionConfig(),
  question = NULL,
  outdir = NULL,
  verbosity = 1L
) {
  if (!is.null(outdir)) {
    outdir <- sanitize_path(outdir, must_exist = FALSE, type = "any")
  }
  SuperConfigLive(
    dat_training = dat_training,
    dat_validation = dat_validation,
    dat_test = dat_test,
    weights = weights,
    preprocessor_config = preprocessor_config,
    algorithm = algorithm,
    hyperparameters = hyperparameters,
    tuner_config = tuner_config,
    outer_resampling_config = outer_resampling_config,
    execution_config = execution_config,
    question = question,
    outdir = outdir,
    verbosity = as.integer(verbosity)
  )
} # /rtemis::setup_SuperConfigLive


================================================
FILE: R/15_CheckData.R
================================================
# CheckData.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% CheckData ----
#' @author EDG
#' @noRd
CheckData <- new_class(
  name = "CheckData",
  properties = list(
    object_class = class_character,
    name = class_character,
    n_rows = class_integer,
    n_cols = class_integer,
    n_numeric = class_integer,
    n_integer = class_integer,
    n_character = class_integer,
    n_factor = class_integer,
    n_ordered = class_integer,
    n_date = class_integer,
    n_constant = class_integer,
    n_duplicates = class_integer,
    n_cols_anyna = class_integer,
    n_na = class_integer,
    classes_na = class_any | NULL,
    na_feature_pct = class_data.frame | NULL,
    na_case_pct = class_data.frame | NULL,
    n_na_last_col = class_integer | NULL
  ),
  constructor = function(
    object_class,
    name,
    n_rows,
    n_cols,
    n_numeric,
    n_integer,
    n_character,
    n_factor,
    n_ordered,
    n_date,
    n_constant,
    n_duplicates,
    n_cols_anyna,
    n_na,
    classes_na = NULL,
    na_feature_pct = NULL,
    na_case_pct = NULL,
    n_na_last_col = NULL
  ) {
    n_rows <- clean_int(n_rows)
    n_cols <- clean_int(n_cols)
    n_numeric <- clean_int(n_numeric)
    n_integer <- clean_int(n_integer)
    n_character <- clean_int(n_character)
    n_factor <- clean_int(n_factor)
    n_ordered <- clean_int(n_ordered)
    n_date <- clean_int(n_date)
    n_constant <- clean_int(n_constant)
    n_duplicates <- clean_int(n_duplicates)
    n_cols_anyna <- clean_int(n_cols_anyna)
    n_na <- clean_int(n_na)
    check_inherits(na_feature_pct, "data.frame")
    check_inherits(na_case_pct, "data.frame")
    n_na_last_col <- clean_int(n_na_last_col)
    new_object(
      S7_object(),
      object_class = object_class,
      name = name,
      n_rows = n_rows,
      n_cols = n_cols,
      n_numeric = n_numeric,
      n_integer = n_integer,
      n_character = n_character,
      n_factor = n_factor,
      n_ordered = n_ordered,
      n_date = n_date,
      n_constant = n_constant,
      n_duplicates = n_duplicates,
      n_cols_anyna = n_cols_anyna,
      n_na = n_na,
      classes_na = classes_na,
      na_feature_pct = na_feature_pct,
      na_case_pct = na_case_pct,
      n_na_last_col = n_na_last_col
    )
  }
) # /rtemis::CheckData


# %% `$`.CheckData ----
# Make CheckData properties `$`-accessible
method(`$`, CheckData) <- function(x, name) {
  prop(x, name)
} # /rtemis::`$`.CheckData


# %% `.DollarNames`.CheckData ----
# `$`-autocomplete CheckData properties
method(`.DollarNames`, CheckData) <- function(x, pattern = "") {
  all_names <- names(x)
  grep(pattern, all_names, value = TRUE)
} # /rtemis::`.DollarNames`.CheckData


# %% `[[`.CheckData ----
# Make CheckData properties `[[`-accessible
method(`[[`, CheckData) <- function(x, name) {
  prop(x, name)
} # /rtemis::`[[`.CheckData


# %% repr.CheckData ----
#' Repr method for CheckData
#'
#' @param x CheckData object.
#'
#' @return Character: String representation of CheckData object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(repr, CheckData) <- function(
  x,
  name = NULL,
  check_integers = FALSE,
  pad = 0L,
  output_type = NULL
) {
  out <- repr_S7name(x)
  if (is.null(name)) {
    name <- x[["name"]]
    if (is.null(name)) name <- deparse(substitute(x))
  }
  n_rows <- x[["n_rows"]]
  n_cols <- x[["n_cols"]]
  n_numeric <- x[["n_numeric"]]
  n_integer <- x[["n_integer"]]
  n_character <- x[["n_character"]]
  n_factor <- x[["n_factor"]]
  n_ordered <- x[["n_ordered"]]
  n_date <- x[["n_date"]]
  n_constant <- x[["n_constant"]]
  n_duplicates <- x[["n_duplicates"]]
  n_cols_anyna <- x[["n_cols_anyna"]]
  n_na <- x[["n_na"]]
  n_na_last_col <- x[["n_na_last_col"]]

  ## Object class and dimensions ----
  out <- paste0(
    "  ",
    highlight(name, pad = pad, output_type = output_type),
    paste(
      ": A",
      x[["object_class"]],
      "with",
      highlight(
        format(n_rows, trim = TRUE, big.mark = ",", scientific = FALSE),
        pad = pad,
        output_type = output_type
      ),
      ngettext(n_rows, "row", "rows"),
      "and",
      highlight(
        format(n_cols, trim = TRUE, big.mark = ",", scientific = FALSE),
        pad = pad,
        output_type = output_type
      ),
      ngettext(n_cols, "column.", "columns.")
    )
  )

  ## Data Types ----
  isOrdered <- if (n_factor == 1) {
    paste(", which", ngettext(n_ordered, "is", "is not"), "ordered")
  } else if (n_factor > 1) {
    paste(
      ", of which",
      fmt(n_ordered, bold = TRUE, pad = pad, output_type = output_type),
      ngettext(n_ordered, "is", "are"),
      "ordered"
    )
  } else {
    ""
  }
  out <- paste(
    out,
    fmt("\n  Data types", bold = TRUE, pad = pad, output_type = output_type),
    paste(
      "  *",
      fmt(n_numeric, bold = TRUE, pad = pad, output_type = output_type),
      "numeric",
      ngettext(n_numeric, "feature", "features")
    ),
    paste(
      "  *",
      fmt(n_integer, bold = TRUE, pad = pad, output_type = output_type),
      "integer",
      ngettext(n_integer, "feature", "features")
    ),
    paste0(
      "  * ",
      fmt(n_factor, bold = TRUE, pad = pad, output_type = output_type),
      ngettext(n_factor, " factor", " factors"),
      isOrdered
    ),
    paste(
      "  *",
      fmt(n_character, bold = TRUE, pad = pad, output_type = output_type),
      "character",
      ngettext(n_character, "feature", "features")
    ),
    paste(
      "  *",
      fmt(n_date, bold = TRUE, pad = pad, output_type = output_type),
      "date",
      ngettext(n_date, "feature", "features")
    ),
    sep = "\n"
  )

  ## Issues ----
  out <- paste(
    out,
    fmt("\n  Issues", bold = TRUE, pad = pad, output_type = output_type),
    sep = "\n"
  )
  out <- paste(
    out,
    paste(
      "  *",
      fmt(
        n_constant,
        col = if (n_constant > 0) rt_red else NULL,
        bold = TRUE,
        pad = pad,
        output_type = output_type
      ),
      "constant",
      ngettext(n_constant, "feature", "features")
    ),
    sep = "\n"
  )

  out <- paste(
    out,
    paste(
      "  *",
      fmt(
        n_duplicates,
        col = if (n_duplicates > 0) rt_orange else NULL,
        bold = TRUE,
        pad = pad,
        output_type = output_type
      ),
      "duplicate",
      ngettext(n_duplicates, "case", "cases")
    ),
    sep = "\n"
  )

  nas <- if (n_cols_anyna > 0) {
    classes_na <- x[["classes_na"]]
    .col <- if (n_cols_anyna > 0) rt_orange else NULL
    out_nas <- paste(
      fmt(
        n_cols_anyna,
        col = .col,
        bold = TRUE,
        pad = pad,
        output_type = output_type
      ),
      ngettext(n_cols_anyna, "feature includes", "features include"),
      "'NA' values;",
      fmt(n_na, col = .col, bold = TRUE, pad = pad, output_type = output_type),
      "'NA'",
      ngettext(n_na, "value", "values"),
      "total\n    *",
      paste0(
        mapply(
          function(val, name) {
            paste(
              fmt(
                val,
                col = .col,
                bold = TRUE,
                pad = pad,
                output_type = output_type
              ),
              tolower(name)
            )
          },
          classes_na,
          names(classes_na)
        ),
        collapse = "; "
      )
    )
    if (n_na_last_col > 0) {
      out_nas <- paste(
        out_nas,
        paste0(
          "\n    * ",
          fmt(
            n_na_last_col,
            col = .col,
            bold = TRUE,
            pad = pad,
            output_type = output_type
          ),
          ngettext(n_na_last_col, " missing value", " missing values"),
          " in the last column"
        )
      )
    }
    out_nas
  } else {
    paste(
      fmt("0", bold = TRUE, pad = pad, output_type = output_type),
      "missing values"
    )
  }
  out <- paste0(out, "\n  * ", nas)

  ## Recommendations ----
  out <- paste(
    out,
    fmt(
      "\n  Recommendations",
      bold = TRUE,
      pad = pad,
      output_type = output_type
    ),
    sep = "\n"
  )

  if (sum(n_character, n_constant, n_duplicates, n_cols_anyna) > 0) {
    if (n_character > 0) {
      out <- paste(
        out,
        fmt(
          "  * Consider converting character features to factors or excluding them.",
          col = rt_orange,
          bold = TRUE,
          pad = pad,
          output_type = output_type
        ),
        sep = "\n"
      )
    }
    if (n_constant > 0) {
      out <- paste(
        out,
        fmt(
          (paste(
            "  * Remove the constant",
            ngettext(n_constant, "feature.", "features.")
          )),
          col = rt_red,
          bold = TRUE,
          pad = pad,
          output_type = output_type
        ),
        sep = "\n"
      )
    }

    if (n_duplicates > 0) {
      out <- paste(
        out,
        fmt(
          paste(
            "  * Consider removing the duplicate",
            ngettext(n_duplicates, "case.", "cases.")
          ),
          col = rt_orange,
          bold = TRUE,
          pad = pad,
          output_type = output_type
        ),
        sep = "\n"
      )
    }

    if (n_cols_anyna > 0) {
      out <- paste(
        out,
        fmt(
          paste(
            "  * Consider using algorithms that can handle missingness or imputing missing values."
          ),
          col = rt_blue,
          bold = TRUE,
          pad = pad,
          output_type = output_type
        ),
        sep = "\n"
      )
      # Note regarding missing values in last column
      if (n_na_last_col > 0) {
        out <- paste(
          out,
          fmt(
            "\n  * Filter cases with missing values in the last column if using dataset for supervised learning.\n",
            col = rt_orange,
            bold = TRUE,
            pad = pad,
            output_type = output_type
          )
        )
      }
    }

    if (check_integers && n_integer > 0) {
      out <- paste(
        out,
        paste0(
          "  * Check the",
          ifelse(n_integer > 1, paste("", n_integer, ""), " "),
          "integer",
          ngettext(n_integer, " feature", " features"),
          " and consider if",
          ngettext(n_integer, " it", " they"),
          " should be converted to ",
          ngettext(n_integer, "factor", "factors")
        ),
        sep = "\n"
      )
    }
  } else {
    out <- paste(
      out,
      fmt(
        "  * Everything looks good",
        col = rt_green,
        bold = TRUE,
        pad = pad,
        output_type = output_type
      ),
      sep = "\n"
    )
  }
  paste0(out, "\n")
} # /rtemis::repr.CheckData


# %% print.CheckData ----
method(print, CheckData) <- function(
  x,
  name = NULL,
  check_integers = FALSE,
  output_type = NULL,
  ...
) {
  cat(repr(
    x,
    name = name,
    check_integers = check_integers,
    output_type = output_type
  ))
  invisible(x)
} # /rtemis::print.CheckData


================================================
FILE: R/16_S7utils.R
================================================
# S7_utils
# ::rtemis::
# 2025 EDG rtemis.org

# %% SuperWorkers ----
#' @keywords internal
#' @noRd
SuperWorkers <- new_class(
  name = "SuperWorkers",
  properties = list(
    algorithm = class_character,
    max_workers = class_integer,
    max_workers_algorithm = class_integer,
    max_workers_tuning = class_integer,
    max_workers_resampling = class_integer
  ),
  constructor = function(
    algorithm,
    max_workers,
    max_workers_algorithm,
    max_workers_tuning,
    max_workers_resampling
  ) {
    max_workers <- clean_posint(max_workers)
    max_workers_algorithm <- clean_posint(max_workers_algorithm)
    max_workers_tuning <- clean_posint(max_workers_tuning)
    max_workers_resampling <- clean_posint(max_workers_resampling)
    # Validate input
    if (
      max_workers_algorithm + max_workers_tuning + max_workers_resampling >
        max_workers
    ) {
      cli::cli_abort(
        "Total workers for algorithm, tuning, and resampling cannot exceed max_workers."
      )
    }
    new_object(
      S7_object(),
      algorithm = algorithm,
      max_workers = max_workers,
      max_workers_algorithm = max_workers_algorithm,
      max_workers_tuning = max_workers_tuning,
      max_workers_resampling = max_workers_resampling
    )
  }
) # /rtemis::SuperWorkers


# %% BiasVariance ----
BiasVariance <- new_class(
  name = "BiasVariance",
  properties = list(
    bias_squared = class_numeric,
    mean_bias_squared = class_numeric,
    sd_bias_squared = class_numeric,
    variance = class_numeric,
    mean_variance = class_numeric,
    sd_variance = class_numeric
  )
)

# %% `[[`.BiasVariance ----
# Make BiasVariance props `[[`- accessible ----
method(`[[`, BiasVariance) <- function(x, name) {
  prop(x, name)
}


# %% repr.BiasVariance ----
method(repr, BiasVariance) <- function(
  x,
  pad = 0L,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)
  paste0(
    repr_S7name("BiasVariance"),
    "Mean squared bias: ",
    highlight(ddSci(x[["mean_bias_squared"]]), output_type = output_type),
    " (",
    ddSci(x[["sd_bias_squared"]]),
    ")\n",
    "Mean variance: ",
    highlight(
      ddSci(x[["mean_variance"]]),
      output_type = output_type
    ),
    " (",
    ddSci(x[["sd_variance"]]),
    ")\n"
  )
} # /rtemis::repr.BiasVariance


# %% print.BiasVariance ----
#' Print method for BiasVariance
#'
#' @param x BiasVariance object.
#' @param ... Not used.
#'
#' @return `x`, invisibly.
#'
#' @author EDG
#' @noRd
method(print, BiasVariance) <- function(x, ...) {
  cat(repr(x))
  invisible(x)
} # /rtemis::print.BiasVariance


================================================
FILE: R/algorithmDB.R
================================================
# algorithmDB.R
# ::rtemis::
# 2025 EDG rtemis.org

# Supervised Learning ----
supervised_algorithms <- data.frame(rbind(
  c("CART", "Classification and Regression Trees", TRUE, TRUE, TRUE),
  c("GAM", "Generalized Additive Model", TRUE, TRUE, FALSE),
  c("GLM", "Generalized Linear Model", TRUE, TRUE, FALSE),
  c("GLMNET", "Elastic Net", TRUE, TRUE, TRUE),
  c("Isotonic", "Isotonic Regression", TRUE, TRUE, FALSE),
  c("LightCART", "Decision Tree", TRUE, TRUE, FALSE),
  c("LightGBM", "Gradient Boosting", TRUE, TRUE, FALSE),
  c("LightRF", "LightGBM Random Forest", TRUE, TRUE, FALSE),
  c("LightRuleFit", "LightGBM RuleFit", TRUE, TRUE, FALSE),
  c("Ranger", "Random Forest", TRUE, TRUE, FALSE),
  c(
    "LinearSVM",
    "Support Vector Machine with Linear Kernel",
    TRUE,
    TRUE,
    FALSE
  ),
  c(
    "RadialSVM",
    "Support Vector Machine with Radial Kernel",
    TRUE,
    TRUE,
    FALSE
  ),
  c("TabNet", "Attentive Interpretable Tabular Learning", TRUE, TRUE, FALSE)
))
colnames(supervised_algorithms) <- c(
  "Name",
  "Description",
  "Class",
  "Reg",
  "Surv"
)

supervised_multiclass <- c(
  "GLMNET",
  "CART",
  "LightCART",
  "LightRF",
  "LightGBM",
  "LinearSVM",
  "RadialSVM",
  "Ranger"
)

get_alg_name <- function(algorithm) {
  algname <- supervised_algorithms[, 1][
    tolower(algorithm) == tolower(supervised_algorithms[, 1])
  ]
  if (length(algname) == 0) {
    cli::cli_abort(algorithm, "Incorrect algorithm specified")
  }
  algname
}

get_alg_setup <- function(algorithm) {
  paste0("setup_", get_alg_name(algorithm))
}

#' Get algorithm description
#'
#' @param algorithm Character: Algorithm name.
#'
#' @return Character: Algorithm description.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
desc_alg <- function(algorithm) {
  algdesc <- supervised_algorithms[, 2][
    tolower(algorithm) == tolower(supervised_algorithms[, 1])
  ]
  if (length(algdesc) == 0) {
    cli::cli_abort(algorithm, "Incorrect algorithm specified")
  }
  algdesc
} # /rtemis::desc_alg

#' Algorithm description with short name
#'
#' @param algorithm Character: Algorithm name.
#'
#' @return Character: Algorithm description with short name in parentheses.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
desc_abb_alg <- function(algorithm) {
  paste0(
    desc_alg(algorithm),
    " (",
    get_alg_name(algorithm),
    ")"
  )
} # /rtemis::desc_abb_alg

get_train_fn <- function(algorithm) {
  paste0("train_", get_alg_name(algorithm))
} # /rtemis::get_train_fn

get_default_hyperparameters <- function(algorithm, type, ncols) {
  alg_name <- get_alg_name(algorithm)
  if (alg_name == "LightRF") {
    setup_LightRF(
      feature_fraction = if (type == "Classification") {
        sqrt(ncols) / ncols
      } else {
        0.33
      }
    )
  } else {
    do.call(paste0("setup_", get_alg_name(algorithm)), list())
  }
} # /rtemis::get_default_hyperparameters


# use e.g. in draw_scatter
setup_alg <- function(algorithm, ...) {
  alg_name <- get_alg_name(algorithm)
  setup_fn <- get_alg_setup(algorithm)
  do_call(setup_fn, list(...))
} # /rtemis::setup_alg

# Clustering ----
clust_algorithms <- data.frame(rbind(
  c("CMeans", "Fuzzy C-means Clustering"),
  c("DBSCAN", "Density-based spatial clustering of applications with noise"),
  # c("EMC", "Expectation Maximization Clustering"),
  c("HardCL", "Hard Competitive Learning"),
  # c("HOPACH", "Hierarchical Ordered Partitioning And Collapsing Hybrid"),
  # c("H2OKMeans", "H2O K-Means Clustering"),
  c("KMeans", "K-Means Clustering"),
  # c("MeanShift", "Mean Shift Clustering"),
  c("NeuralGas", "Neural Gas Clustering")
  # c("PAM", "Partitioning Around Medoids"),
  # c("PAMK", "Partitioning Around Medoids with k estimation"),
  # c("SPEC", "Spectral Clustering")
))

get_clust_name <- function(algorithm) {
  clustname <- clust_algorithms[, 1][
    tolower(algorithm) == tolower(clust_algorithms[, 1])
  ]
  if (length(clustname) == 0) {
    cli::cli_abort(algorithm, "Incorrect clustering algorithm specified")
  }
  clustname
} # /rtemis::get_clust_name

get_clust_desc <- function(algorithm) {
  clustdesc <- clust_algorithms[, 2][
    tolower(algorithm) == tolower(clust_algorithms[, 1])
  ]
  if (length(clustdesc) == 0) {
    cli::cli_abort(algorithm, "Incorrect clustering algorithm specified")
  }
  clustdesc
} # /rtemis::get_clust_desc

get_clust_fn <- function(algorithm) {
  paste0("cluster_", get_clust_name(algorithm))
} # /rtemis::get_clust_fn

get_default_clusterparams <- function(algorithm) {
  do.call(paste0("setup_", get_clust_name(algorithm)), list())
}

get_clustpredict_fn <- function(algorithm) {
  paste0("clustpredict_", get_clust_name(algorithm))
}

get_clust_setup_fn <- function(algorithm) {
  paste0("setup_", get_clust_name(algorithm))
} # /rtemis::get_clust_setup_fn


# Decomposition ----
decom_algorithms <- data.frame(rbind(
  # c("H2OAE", "H2O Autoencoder"),
  # c("H2OGLRM", "H2O Generalized Low-Rank Model"),
  c("ICA", "Independent Component Analysis"),
  c("Isomap", "Isomap"),
  # c("KPCA", "Kernel Principal Component Analysis"),
  # c("LLE", "Locally Linear Embedding"),
  # c("MDS", "Multidimensional Scaling"),
  c("NMF", "Non-negative Matrix Factorization"),
  c("PCA", "Principal Component Analysis"),
  # c("SPCA", "Sparse Principal Component Analysis"),
  # c("SVD", "Singular Value Decomposition"),
  c("tSNE", "t-distributed Stochastic Neighbor Embedding"),
  c("UMAP", "Uniform Manifold Approximation and Projection")
))

get_decom_name <- function(algorithm) {
  decomname <- decom_algorithms[, 1][
    tolower(algorithm) == tolower(decom_algorithms[, 1])
  ]
  if (length(decomname) == 0) {
    cli::cli_abort(algorithm, "Incorrect decomposition algorithm specified")
  }
  decomname
} # /rtemis::get_decom_name

get_decom_desc <- function(algorithm) {
  decomdesc <- decom_algorithms[, 2][
    tolower(algorithm) == tolower(decom_algorithms[, 1])
  ]
  if (length(decomdesc) == 0) {
    cli::cli_abort(algorithm, "Incorrect decomposition algorithm specified")
  }
  decomdesc
} # /rtemis::get_decom_desc

get_decom_fn <- function(algorithm) {
  paste0("decom_", get_decom_name(algorithm))
} # /rtemis::get_decom_fn

get_default_decomparams <- function(algorithm) {
  do.call(paste0("setup_", get_decom_name(algorithm)), list())
} # /rtemis::get_default_decomparams

get_decom_setup_fn <- function(algorithm) {
  paste0("setup_", get_decom_name(algorithm))
} # /rtemis::get_decom_setup_fn

get_decom_predict_fn <- function(algorithm) {
  paste0("predict_", get_decom_name(algorithm))
} # /rtemis::get_decom_predict_fn


#' Available Algorithms
#'
#' Print available algorithms for supervised learning, clustering, and decomposition.
#'
#' @rdname available_algorithms
#' @aliases available_algorithms
#'
#' @param verbosity Integer: Verbosity level.
#' @return Named list of algorithm descriptions, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' available_supervised()
available_supervised <- function(verbosity = 1L) {
  algs <- structure(
    supervised_algorithms[, 2],
    names = supervised_algorithms[, 1],
    class = "list"
  )
  if (verbosity > 0L) {
    printls(algs, print_class = FALSE, limit = -1L)
  }
  invisible(algs)
}

#' @rdname available_algorithms
#' @export
#'
#' @examples
#' available_clustering()
available_clustering <- function(verbosity = 1L) {
  algs <- structure(
    clust_algorithms[, 2],
    names = clust_algorithms[, 1],
    class = "list"
  )
  if (verbosity > 0L) {
    printls(algs, print_class = FALSE, limit = -1L)
  }
  invisible(algs)
}


#' @rdname available_algorithms
#' @export
#'
#' @examples
#' available_decomposition()
available_decomposition <- function(verbosity = 1L) {
  algs <- structure(
    decom_algorithms[, 2],
    names = decom_algorithms[, 1],
    class = "list"
  )
  if (verbosity > 0L) {
    printls(algs, print_class = FALSE, limit = -1L)
  }
  invisible(algs)
}

# Draw ----
draw_fns <- data.frame(
  rbind(
    c("draw_3DScatter", "3D Scatter Plot"),
    c("draw_bar", "Bar Plot"),
    c("draw_box", "Box Plot"),
    c("draw_calibration", "Calibration Plot"),
    c("draw_confusion", "Confusion Matrix"),
    c("draw_dist", "Density and Histogram Plots"),
    c("draw_fit", "Scatter Plot with Fit Line alias"),
    c("draw_graphD3", "Network Graph using networkD3"),
    c("draw_graphjs", "Network Graph using graphjs"),
    c("draw_heat", "Heatmap using plotly"),
    c("draw_heatmap", "Heatmap using heatmaply"),
    c("draw_leafleat", "Choropleth Map using leaflet"),
    c("draw_pie", "Pie Chart"),
    c("draw_protein", "Amino Acid Annotation Plot"),
    c("draw_roc", "ROC Curve"),
    c("draw_scatter", "Scatter Plot"),
    c("draw_spectrogram", "Spectrogram"),
    c("draw_table", "Table using plotly"),
    c("draw_ts", "Time Series Plot"),
    c("draw_varimp", "Barplot for Variable Importance alias"),
    c("draw_volcano", "Volcano Plot"),
    c("draw_xt", "Time Series Line Plot")
  )
)
colnames(draw_fns) <- c("Function Name", "Description")


#' Available Draw Functions
#'
#' Print available draw functions for visualization.
#'
#' @param verbosity Integer: Verbosity level.
#'
#' @return Named list of draw function descriptions, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' available_draw()
available_draw <- function(verbosity = 1L) {
  fns <- structure(
    draw_fns[, 2],
    names = draw_fns[, 1],
    class = "list"
  )
  if (verbosity > 0L) {
    cat("Available draw functions:\n")
    printls(fns, print_class = FALSE, limit = -1L)
  }
  invisible(fns)
} # /rtemis::available_draw


================================================
FILE: R/calibrate.R
================================================
# calibrate.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% calibrate.Classification ----
#' Calibrate Binary Classification Models
#'
#' @description
#' The goal of calibration is to adjust the predicted probabilities of a binary classification
#' model so that they better reflect the true probabilities (i.e. empirical risk) of the positive
#' class.
#'
#' @details
#' Important: The calibration model's training data should be different from the classification
#' model's training data.
#'
#' @param x `Classification` object.
#' @param predicted_probabilities Numeric vector: Predicted probabilities.
#' @param true_labels Factor: True class labels.
#' @param algorithm Character: Algorithm to use to train calibration model.
#' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions.
#' @param verbosity Integer: Verbosity level.
#' @param ... Not used
#'
#' @return `CalibratedClassification` object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' datc2 <- data.frame(
#'   gn = factor(sample(c("alpha", "beta", "gamma"), 100, replace = TRUE)),
#'   iris[51:150, ]
#' )
#' res <- resample(datc2)
#' datc2$Species <- factor(datc2$Species)
#' datc2_train <- datc2[res[[1]], ]
#' datc2_test <- datc2[-res[[1]], ]
#' mod_c_glm <- train(
#'   x = datc2_train,
#'   dat_test = datc2_test,
#'   algorithm = "glm"
#' )
#' mod_c_glm_cal <- calibrate(
#'   mod_c_glm,
#'   predicted_probabilities = mod_c_glm$predicted_prob_training,
#'   true_labels = mod_c_glm$y_training
#' )
#' mod_c_glm_cal
method(calibrate, Classification) <- function(
  x,
  predicted_probabilities,
  true_labels,
  algorithm = "isotonic",
  hyperparameters = NULL,
  verbosity = 1L,
  ...
) {
  # Check inputs
  check_float01inc(predicted_probabilities)
  check_inherits(true_labels, "factor")

  # Training data is whatever is passed by user
  dat <- data.table(predicted_probabilities, true_labels)
  # Test data is taken from mod, if available
  if (!is.null(x@y_test) && !is.null(x@predicted_prob_test)) {
    dat_test <- data.table(
      predicted_probabilities = x@predicted_prob_test,
      true_labels = x@y_test
    )
  } else {
    dat_test <- NULL
  }
  # Calibration model
  if (verbosity > 0L) {
    msg(
      fmt("<>", col = col_calibrator, bold = TRUE),
      "Calibrating",
      x@algorithm,
      "classification..."
    )
  }
  cal_model <- train(
    dat,
    dat_test = dat_test,
    algorithm = algorithm,
    hyperparameters = hyperparameters,
    verbosity = verbosity
  )

  mod_cal <- CalibratedClassification(x, cal_model)
  if (verbosity > 0L) {
    message()
    print(mod_cal)
    message()
  }
  if (verbosity > 0L) {
    msg(fmt("</>", col = col_calibrator, bold = TRUE), "Calibration done.")
  }
  mod_cal
} # /rtemis::calibrate


# %% calibrate.ClassificationRes ----
#' Calibrate Resampled Classification Models
#'
#' @param x `ClassificationRes` object.
#' @param algorithm Character: Algorithm to use to train calibration model.
#' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions.
#' @param resampler_config `ResamplerConfig` object: Configuration for resampling during calibration model training.
#' @param train_verbosity Integer: Verbosity level for training calibration models.
#' @param verbosity Integer: Verbosity level.
#' @param ... Not used
#'
#' @return `CalibratedClassificationRes` object.
#' @author EDG
#' @keywords internal
#' @noRd
method(calibrate, ClassificationRes) <- function(
  x,
  algorithm = "isotonic",
  hyperparameters = NULL,
  resampler_config = setup_Resampler(
    n_resamples = 5L,
    type = "KFold"
  ),
  train_verbosity = 0L,
  verbosity = 1L,
  ...
) {
  # Check inputs
  check_inherits(algorithm, "character")
  check_is_S7(resampler_config, ResamplerConfig)
  verbosity <- clean_int(verbosity)

  # Check IFW is FALSE
  if (!is.null(hyperparameters) && hyperparameters[["ifw"]]) {
    cli::cli_abort("IFW must be FALSE for proper calibration.")
  }

  # Calibration models
  if (verbosity > 0L) {
    msg(
      fmt("<>", col = col_calibrator, bold = TRUE),
      "Calibrating",
      x@algorithm,
      "resampled classification..."
    )
  }
  calmods <- lapply(
    x@models,
    function(mod) {
      dat <- data.table(
        predicted_probabilities = mod@predicted_prob_test,
        true_labels = mod@y_test
      )
      train(
        dat,
        algorithm = algorithm,
        hyperparameters = hyperparameters,
        outer_resampling_config = resampler_config,
        verbosity = train_verbosity
      )
    }
  )
  names(calmods) <- names(x@models)

  # CalibratedClassificationRes
  modres_cal <- CalibratedClassificationRes(x, calmods)

  # Outro ----
  if (verbosity > 0L) {
    message()
    print(modres_cal)
    message()
  }
  if (verbosity > 0L) {
    msg(fmt("</>", col = col_calibrator, bold = TRUE), "Calibration done.")
  }
  modres_cal
} # /rtemis::calibrate.ClassificationRes


================================================
FILE: R/check_data.R
================================================
# check_data.R
# ::rtemis::
# 2022- EDG rtemis.org

# %% check_data ----
#' Check Data
#'
#' @param x tabular data: Input to be checked.
#' @param name Character: Name of dataset.
#' @param get_duplicates Logical: If TRUE, check for duplicate cases.
#' @param get_na_case_pct Logical: If TRUE, calculate percent of NA values per
#' case.
#' @param get_na_feature_pct Logical: If TRUE, calculate percent of NA values
#' per feature.
#'
#' @return `CheckData` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' n <- 1000
#' x <- rnormmat(n, 50, return_df = TRUE)
#' x$char1 <- sample(letters, n, TRUE)
#' x$char2 <- sample(letters, n, TRUE)
#' x$fct <- factor(sample(letters, n, TRUE))
#' x <- rbind(x, x[1, ])
#' x$const <- 99L
#' x[sample(nrow(x), 20), 3] <- NA
#' x[sample(nrow(x), 20), 10] <- NA
#' x$fct[30:35] <- NA
#' check_data(x)
check_data <- function(
  x,
  name = NULL,
  get_duplicates = TRUE,
  get_na_case_pct = FALSE,
  get_na_feature_pct = FALSE
) {
  if (is.null(name)) {
    name <- deparse(substitute(x))
  }
  # Check is tabular
  check_tabular(x)
  # Get class of x before converting to data.table
  object_class <- class(x)[1]
  # Convert to data.table
  x <- as.data.table(x)
  n_rows <- NROW(x)
  n_cols <- NCOL(x)

  # Data Types ----
  classes <- sapply(x, \(v) class(v)[1])
  counts <- table(classes)

  ## Numeric ----
  n_numeric <- max0(counts["numeric"])

  ## Integers ----
  n_integer <- max0(counts["integer"])

  ## Characters ----
  n_character <- max0(counts["character"])

  ## Factors ----
  index_factor <- which(sapply(x, is.factor))
  n_factor <- length(index_factor)
  index_ordered <- which(sapply(x, is.ordered))
  n_ordered <- length(index_ordered)

  ## Dates ----
  n_date <- sum(
    max0(counts["Date"]),
    max0(counts["IDate"]),
    max0(counts["POSIXct"]),
    max0(counts["POSIXlt"])
  )

  # Issues ----

  ## Constants ----
  index_constant <- which(sapply(x, is_constant))
  n_constant <- length(index_constant)

  ## Duplicates ----
  n_duplicates <- if (get_duplicates) {
    n_rows - uniqueN(x)
  } else {
    NA
  }

  ## NAs ----
  cols_anyna <- which(sapply(x, anyNA))
  n_cols_anyna <- length(cols_anyna)
  index_na <- which(is.na(x))
  n_na <- length(index_na)

  ## Get percent of NA values per feature and per case
  if (n_cols_anyna > 0) {
    na_feature_pct <- if (get_na_feature_pct) {
      data.frame(
        Feature = names(cols_anyna),
        Pct_NA = sapply(seq_len(n_cols_anyna), \(i) {
          sum(is.na(x[[cols_anyna[i]]])) / n_rows
        })
      )
    } else {
      NULL
    }

    index_incomplete <- which(!complete.cases(x))
    n_incomplete <- length(index_incomplete)

    na_case_pct <- if (get_na_case_pct) {
      data.frame(
        Case = index_incomplete,
        Pct_NA = sapply(seq_len(n_incomplete), \(i) {
          sum(is.na(x[index_incomplete[i], ])) / n_cols
        })
      )
    } else {
      NULL
    }

    # Get types of features with NA
    classes_na <- table(classes[cols_anyna])

    # Get N of NAs in last column
    n_na_last_col <- sum(is.na(x[[n_cols]]))
  } else {
    n_na_last_col <- 0
    classes_na <- NULL
    na_feature_pct <- if (get_na_feature_pct) {
      data.frame(
        Feature = character(0),
        Pct_NA = double(0)
      )
    } else {
      NULL
    }
    na_case_pct <- if (get_na_case_pct) {
      data.frame(
        Case = integer(0),
        Pct_NA = double(0)
      )
    } else {
      NULL
    }
  }

  # CheckData ----
  CheckData(
    object_class = object_class,
    name = name,
    n_rows = n_rows,
    n_cols = n_cols,
    n_numeric = n_numeric,
    n_integer = n_integer,
    n_character = n_character,
    n_factor = n_factor,
    n_ordered = n_ordered,
    n_date = n_date,
    n_constant = n_constant,
    n_duplicates = n_duplicates,
    n_cols_anyna = n_cols_anyna,
    n_na = n_na,
    classes_na = classes_na,
    na_feature_pct = na_feature_pct,
    na_case_pct = na_case_pct,
    n_na_last_col = n_na_last_col
  )
} # /rtemis::check_data


# %% max0 ----
#' Helper function to get max or 0
#'
#' @param x Numeric vector
#'
#' @return Numeric: max(x, 0)
#'
#' @author EDG
#' @keywords internal
#' @noRd
max0 <- function(x) max(x, 0, na.rm = TRUE)


# %% to_html.CheckData ----
#' Generate `CheckData` object description in HTML
#'
#' @param x `CheckData` object
#' @param name Character: Name of the data set
#' @param css List: CSS styles
#'
#' @return `shiny.tag` object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(to_html, CheckData) <- function(
  x,
  name = NULL,
  css = list(
    font_family = "Helvetica",
    color = "#fff",
    background_color = "#242424"
  )
) {
  n_rows <- x[["n_rows"]]
  n_cols <- x[["n_cols"]]
  n_numeric <- x[["n_numeric"]]
  n_integer <- x[["n_integer"]]
  n_character <- x[["n_character"]]
  n_factor <- x[["n_factor"]]
  n_ordered <- x[["n_ordered"]]
  n_date <- x[["n_date"]]
  n_constant <- x[["n_constant"]]
  n_duplicates <- x[["n_duplicates"]]
  n_cols_anyna <- x[["n_cols_anyna"]]
  n_na <- x[["n_na"]]
  classes_na <- x[["classes_na"]]

  ## Data Types ----
  numeric <- HTML(paste(
    strong(n_numeric),
    "numeric",
    ngettext(n_numeric, "feature", "features")
  ))
  integer <- HTML(paste(
    strong(n_integer),
    "integer",
    ngettext(n_integer, "feature", "features")
  ))
  categorical <- HTML(paste0(
    strong(n_factor),
    ngettext(n_factor, " factor", " factors"),
    if (n_factor == 1) {
      paste(", which", ngettext(n_ordered, "is", "is not"), "ordered")
    } else if (n_factor > 1) {
      paste(
        ", of which",
        strong(n_ordered),
        ngettext(n_ordered, "is", "are"),
        "ordered"
      )
    }
  ))
  # .col <- if (n_character > 0) html_orange else strong
  .col <- strong
  characters <- HTML(paste(
    .col(n_character),
    "character",
    ngettext(n_character, "feature", "features")
  ))
  dates <- HTML(paste(
    strong(n_date),
    "date",
    ngettext(n_date, "feature", "features")
  ))

  ## Issues ----
  .col <- if (n_constant > 0) html_red else strong
  constants <- HTML(paste(
    .col(n_constant),
    "constant",
    ngettext(n_constant, "feature", "features")
  ))
  .col <- if (n_duplicates > 0) html_orange else strong
  duplicates <- HTML(paste(
    .col(n_duplicates),
    "duplicate",
    ngettext(n_duplicates, "case", "cases")
  ))

  .col <- if (n_cols_anyna > 0) html_orange else strong
  nas <- if (n_cols_anyna > 0) {
    HTML(paste(
      .col(n_cols_anyna),
      ngettext(n_cols_anyna, "feature includes", "features include"),
      "'NA' values; ",
      .col(n_na),
      "'NA'",
      ngettext(n_na, "value", "values"),
      "total",
      tags[["ul"]](
        lapply(seq_along(classes_na), \(i) {
          tags[["li"]](HTML(paste(
            .col(classes_na[i]),
            tolower(names(classes_na)[i])
            # ngettext(classes_na[i], "feature", "features")
          )))
        })
      )
    ))
  } else {
    HTML(paste(strong("0"), "missing values"))
  }

  ## Recs ----
  rec_constant <- if (n_constant > 0) {
    tags[["li"]](HTML(paste(html_orange(
      "Remove the constant",
      ngettext(n_constant, "feature", "features")
    ))))
  } else {
    NULL
  }

  rec_dups <- if (n_duplicates > 0) {
    tags[["li"]](HTML(paste(html_orange(
      "Consider removing the duplicate",
      ngettext(n_duplicates, "case", "cases")
    ))))
  } else {
    NULL
  }

  rec_na <- if (n_cols_anyna > 0) {
    list(
      if (isTRUE(classes_na["factor"] > 0)) {
        tags[["li"]](HTML(paste(html_orange(
          "Consider assigning factor 'NA' values to new 'missing' level"
        ))))
      },
      tags[["li"]](HTML(paste(html_orange(
        "Consider imputing missing values or using algorithms that can handle missing values"
      ))))
    )
  } else {
    NULL
  }

  recs <- if (sum(n_constant, n_duplicates, n_cols_anyna) == 0) {
    tags[["li"]](html_success("Everything looks good"))
  } else {
    list(
      rec_constant,
      rec_dups,
      rec_na
    )
  }
  ## out ----
  div(
    p(
      div(
        html_highlight(name),
        ": A",
        x[["class"]],
        "with",
        html_highlight(n_rows),
        ngettext(n_rows, "row", "rows"),
        "and",
        html_highlight(n_cols),
        ngettext(n_cols, "feature", "features"),
        class = "checkdata-header"
      )
    ),
    p(
      span(strong("Data types"), class = "sidelined"),
      tags[["ul"]](
        tags[["li"]](numeric),
        tags[["li"]](integer),
        tags[["li"]](categorical),
        tags[["li"]](characters),
        tags[["li"]](dates)
      )
    ), # p Data Types
    p(
      span(strong("Issues"), class = "sidelined"),
      tags[["ul"]](
        tags[["li"]](constants),
        tags[["li"]](duplicates),
        tags[["li"]](nas)
      )
    ), # p Issues
    p(
      span(strong("Recommendations"), class = "sidelined"),
      tags[["ul"]](
        recs
      )
    ), # p Recommendations
    class = "checkData",
    style = paste0(
      "font-family:",
      css[["font_family"]],
      "; color:",
      css[["color"]],
      "; background-color:",
      css[["background_color"]],
      ";"
    )
  )
} # /rtemis::to_html.CheckData


================================================
FILE: R/check_input_data.R
================================================
# check_supervised.R
# ::rtemis::
# EDG rtemis.org

# Notes:
# Some algorithms do not work with variable names containing dots (SparkML)

# %% check_factor_levels.class_data.frame ----
method(check_factor_levels, class_data.frame) <- function(x, y, z) {
  if (!is.null(y) || !is.null(z)) {
    index_factor <- which(sapply(x, is.factor))
    x_levels <- lapply(x[, index_factor, drop = FALSE], levels)
    if (!is.null(y)) {
      y_levels <- lapply(y[, index_factor, drop = FALSE], levels)
      if (
        !all(sapply(seq_along(x_levels), function(i) {
          identical(x_levels[[i]], y_levels[[i]])
        }))
      ) {
        cli::cli_abort(
          "Training and validation set factor levels do not match."
        )
      }
    }
    if (!is.null(z)) {
      z_levels <- lapply(z[, index_factor, drop = FALSE], levels)
      if (
        !all(sapply(seq_along(x_levels), function(i) {
          identical(x_levels[[i]], z_levels[[i]])
        }))
      ) {
        cli::cli_abort("Training and test set factor levels do not match.")
      }
    }
  }
  invisible()
} # /method(check_factor_levels, class_data.frame)

method(check_factor_levels, class_data.table) <- function(x, y, z) {
  if (!is.null(y) || !is.null(z)) {
    index_factor <- which(sapply(x, is.factor))
    x_levels <- lapply(x[, .SD, .SDcols = index_factor], levels)
    if (!is.null(y)) {
      y_levels <- lapply(y[, .SD, .SDcols = index_factor], levels)
      if (
        !all(sapply(seq_along(x_levels), function(i) {
          identical(x_levels[[i]], y_levels[[i]])
        }))
      ) {
        cli::cli_abort(
          "Training and validation set factor levels do not match."
        )
      }
    }
    if (!is.null(z)) {
      z_levels <- lapply(z[, .SD, .SDcols = index_factor], levels)
      if (
        !all(sapply(seq_along(x_levels), function(i) {
          identical(x_levels[[i]], z_levels[[i]])
        }))
      ) {
        cli::cli_abort("Training and test set factor levels do not match.")
      }
    }
  }
  invisible()
} # /method(check_factor_levels, class_data.table)

#' Check data ahead of supervised learning
#'
#' @param x Data frame: Training set features and outcome in the last column.
#' @param dat_validation Data frame: Validation set features and outcome in the last column.
#' @param dat_test Data frame: Test set features and outcome in the last column.
#' @param allow_missing Logical: If TRUE, allow missing values in the data.
#' @param verbosity Integer: Verbosity level.
#'
#' @return NULL, invisibly. Stops execution if checks fail.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' res <- resample(iris)
#' iris_train <- iris[res[[1]], ]
#' iris_test <- iris[-res[[1]], ]
#' check_supervised(iris_train, dat_test = iris_test)
check_supervised <- function(
  x,
  dat_validation = NULL,
  dat_test = NULL,
  allow_missing = TRUE,
  verbosity = 1L
) {
  # if (upsample && downsample) cli::cli_abort("Only one of upsample and downsample can be TRUE")

  if (verbosity > 0L) {
    msgstart("Checking data is ready for training...")
  }

  # Check types ----
  check_inherits(x, "data.frame")
  if (!is.null(dat_validation)) {
    check_inherits(dat_validation, "data.frame")
  }
  if (!is.null(dat_test)) {
    check_inherits(dat_test, "data.frame")
  }

  # Check dimensions ----
  ncols <- NCOL(x)
  # Since one column must be outcome, need min of 2 columns
  if (ncols < 2) {
    cli::cli_abort("Data must contain at least 1 feature and 1 outcome column.")
  }
  if (!is.null(dat_validation)) {
    if (NCOL(dat_validation) != ncols) {
      cli::cli_abort(
        "\nValidation set must contain same number of columns as training set."
      )
    }
  }
  if (!is.null(dat_test)) {
    if (NCOL(dat_test) != ncols) {
      cli::cli_abort(
        "Test set must contain same number of columns as training set."
      )
    }
  }

  # Missing values ----
  if (anyNA(outcome(x))) {
    cli::cli_abort("Training set outcome cannot contain any missing values.")
  }
  if (!allow_missing && anyNA(x)) {
    cli::cli_abort("Data should not contain missing values.")
  }

  # Outcome class ----
  outcome_class <- class(x[[ncols]])
  if (!outcome_class %in% c("integer", "numeric", "factor")) {
    cli::cli_abort("Outcome must be integer, numeric, or factor.")
  }
  if (!is.null(dat_validation)) {
    if (class(dat_validation[[ncols]]) != outcome_class) {
      cli::cli_abort("Training and validation outcome must be of same class.")
    }
  }
  if (!is.null(dat_test)) {
    if (class(dat_test[[ncols]]) != outcome_class) {
      cli::cli_abort("Training and test outcome must be of same class.")
    }
  }

  # Factor levels ----
  # Check that factors across training, validation, and test contain the same levels.
  check_factor_levels(x = x, y = dat_validation, z = dat_test)

  if (verbosity > 0L) {
    msgdone()
  }
  invisible()
} # /rtemis::check_supervised


# %% check_unsupervised_data ----
#' Check data ahead of unsupervised learning
#'
#' @param x Data frame: Features for unsupervised learning.
#' @param allow_missing Logical: If TRUE, allow missing values in the data. Default is FALSE.
#'
#' @return NULL, invisibly. Stops execution if checks fail.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' check_unsupervised_data(iris[, -5])
check_unsupervised_data <- function(x, allow_missing = FALSE, verbosity = 1L) {
  if (verbosity > 0L) {
    msgstart("Checking unsupervised data...")
  }
  if (NCOL(x) < 2) {
    cli::cli_abort("Data must contain at least 2 columns.")
  }
  if (any(sapply(x, function(x) !is.numeric(x)))) {
    cli::cli_abort("All columns must be numeric.")
  }
  if (!allow_missing && anyNA(x)) {
    cli::cli_abort("Data should not contain missing values.")
  }
  if (verbosity > 0L) {
    msgdone()
  }
  invisible()
} # /rtemis::check_unsupervised_data


================================================
FILE: R/cluster.R
================================================
# cluster.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% cluster ----
#' Perform Clustering
#'
#' Perform clustering on the rows (usually cases) of a dataset.
#'
#' @details
#' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation.
#'
#' @param x Matrix or data.frame: Data to cluster. Rows are cases to be clustered.
#' @param algorithm Character: Clustering algorithm.
#' @param config List: Algorithm-specific config.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `Clustering` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' iris_km <- cluster(exc(iris, "Species"), algorithm = "KMeans")
cluster <- function(
  x,
  algorithm = "KMeans",
  config = NULL,
  verbosity = 1L
) {
  # Checks ----
  if (is.null(config)) {
    config <- get_default_clusterparams(algorithm)
  }
  check_is_S7(config, ClusteringConfig)

  # Intro ----
  start_time <- intro(verbosity = verbosity)

  # Data ----
  if (verbosity > 0L) {
    summarize_unsupervised(x)
  }

  # Cluster ----
  algorithm <- get_clust_name(algorithm)
  if (verbosity > 0L) {
    msg0(bold(paste0("Clustering with ", algorithm, "...")))
  }
  clust <- cluster_(config = config, x = x, verbosity = verbosity)

  # Clusters ----
  clusters <- do_call(
    fn = get_clustpredict_fn(algorithm),
    args = list(clust = clust)
  )

  if (!is.null(config[["k"]])) {
    # For algorithms where k is specified in config
    k <- config[["k"]]
  } else {
    # For algorithms where k is not prescribed, but determined from the clustering result
    k <- length(unique(clusters))
    if (verbosity > 0L) {
      msg0(paste0("Found ", highlight(k), " clusters."))
    }
  }

  # Outro ----
  outro(start_time, verbosity = verbosity)
  Clustering(
    algorithm = algorithm,
    clust = clust,
    k = k,
    clusters = clusters,
    config = config
  )
} # /rtemis::cluster


================================================
FILE: R/cluster_CMeans.R
================================================
# cluster_CMeans.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% cluster_.CMeansConfig ----
#' C-means Clustering
#'
#' @keywords internal
#' @noRd
method(cluster_, CMeansConfig) <- function(config, x, verbosity = 1L) {
  # Dependencies ----
  check_dependencies("e1071")

  # Data ----
  check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity)

  # Cluster ----
  if (verbosity > 0L) {
    msg("Clustering with", config@algorithm, "...")
  }
  clust <- e1071::cmeans(
    x = x,
    centers = config[["k"]],
    iter.max = config[["max_iter"]],
    verbose = verbosity > 0L,
    dist = config[["dist"]],
    method = config[["method"]],
    m = config[["m"]],
    rate.par = config[["rate_par"]],
    weights = config[["weights"]],
    control = config[["control"]]
  )
  check_inherits(clust, "fclust")
  clust
} # /rtemis::cluster_.CMeansConfig


# %% clustpredict_CMeans ----
clustpredict_CMeans <- function(clust) {
  check_inherits(clust, "fclust")
  clust[["cluster"]]
} # /rtemis::clustpredict_CMeans


================================================
FILE: R/cluster_DBSCAN.R
================================================
# cluster_DBSCAN.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% cluster_.DBSCANConfig ----
#' Density-based spatial clustering of applications with noise (DBSCAN)
#'
#' @keywords internal
#' @noRd
method(cluster_, DBSCANConfig) <- function(config, x, verbosity = 1L) {
  # Checks ----
  check_is_S7(config, DBSCANConfig)

  # Dependencies ----
  check_dependencies("dbscan")

  # Data ----
  check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity)

  # Cluster ----
  if (verbosity > 0L) {
    msg("Clustering with", config@algorithm, "...")
  }
  clust <- dbscan::dbscan(
    x = x,
    eps = config[["eps"]],
    minPts = config[["min_points"]],
    weights = config[["weights"]],
    borderPoints = config[["border_points"]],
    search = config[["search"]],
    bucketSize = config[["bucket_size"]],
    splitRule = config[["split_rule"]],
    approx = config[["approx"]]
  )
  check_inherits(clust, "dbscan")
  clust
} # /rtemis::cluster_.DBSCANConfig


# %% clustpredict_DBSCAN ----
clustpredict_DBSCAN <- function(clust, dat_train = NULL, newdata = NULL) {
  check_inherits(clust, "dbscan")
  if (is.null(newdata)) {
    return(clust[["cluster"]])
  } else {
    predict(clust, newdata = newdata, data = dat_train)
  }
} # /rtemis::clustpredict_DBSCAN


================================================
FILE: R/cluster_flexclust.R
================================================
# cluster_KMeans.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% cluster_.KMeansConfig ----
#' K-means Clustering
#'
#' @keywords internal
#' @noRd
method(cluster_, KMeansConfig) <- function(config, x, verbosity = 1L) {
  # Dependencies ----
  check_dependencies("flexclust")

  # Data ----
  check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity)

  # Cluster ----
  if (verbosity > 0L) {
    msg("Clustering with", config@algorithm, "...")
  }
  clust <- flexclust::cclust(
    x = x,
    k = config[["k"]],
    dist = config[["dist"]],
    method = "kmeans"
  )
  check_inherits(clust, "kcca")
  clust
} # /rtemis::cluster_.KMeansConfig


# %% cluster_.HardCLConfig ----
#' Hard Competitive Learning Clustering
#'
#' @keywords internal
#' @noRd
method(cluster_, HardCLConfig) <- function(config, x, verbosity = 1L) {
  # Checks ----
  check_is_S7(config, HardCLConfig)

  # Dependencies ----
  check_dependencies("flexclust")

  # Data ----
  check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity)

  # Cluster ----
  if (verbosity > 0L) {
    msg("Clustering with", config@algorithm, "...")
  }
  clust <- flexclust::cclust(
    x = x,
    k = config[["k"]],
    dist = config[["dist"]],
    method = "hardcl"
  )
  check_inherits(clust, "kcca")
  clust
} # /rtemis::cluster_.HardCLConfig


# %% cluster_.NeuralGasConfig ----
#' Neural Gas Clustering
#'
#' @keywords internal
#' @noRd
method(cluster_, NeuralGasConfig) <- function(config, x, verbosity = 1L) {
  # Dependencies ----
  check_dependencies("flexclust")

  # Data ----
  check_unsupervised_data(x = x, allow_missing = FALSE, verbosity = verbosity)

  # Cluster ----
  if (verbosity > 0L) {
    msg("Clustering with", config@algorithm, "...")
  }
  clust <- flexclust::cclust(
    x = x,
    k = config[["k"]],
    dist = config[["dist"]],
    method = "neuralgas"
  )
  check_inherits(clust, "kcca")
  clust
} # /rtemis::cluster_.NeuralGasConfig


# %% clustpredict_{KMeans,HardCL,NeuralGas} ----
#' clustpredict methods for KMeans, HardCL, NeuralGas
#'
#' @author EDG
#' @keywords internal
#' @noRd
clustpredict_KMeans <- clustpredict_HardCL <- clustpredict_NeuralGas <- function(
  clust,
  newdata = NULL
) {
  check_inherits(clust, "kcca")
  flexclust::clusters(clust, newdata = newdata)
} # /rtemis::clustpredict_{KMeans,HardCL,NeuralGas}


================================================
FILE: R/data_xt_example.R
================================================
#' Example longitudinal dataset
#'
#' A small synthetic dataset demonstrating various participation patterns
#' in longitudinal data, suitable for examples with \code{\link{xtdescribe}}.
#'
#' @format A data frame with 30 rows and 4 variables:
#' \describe{
#'   \item{patient_id}{Integer: Patient identifier (1-10).}
#'   \item{year}{Integer: Year of measurement (2020-2024).}
#'   \item{blood_pressure}{Numeric: Systolic blood pressure measurement.}
#'   \item{treatment}{Character: Treatment group ("A" or "B").}
#' }
#'
#' @details
#' This dataset includes 10 patients measured at up to 5 time points (years 2020-2024).
#' The dataset demonstrates various participation patterns typical in longitudinal studies:
#' \itemize{
#'   \item Complete participation (all time points)
#'   \item Early dropout
#'   \item Late entry
#'   \item Intermittent participation
#'   \item Single time point participation
#' }
#'
#' @examples
#' data(xt_example)
#' head(xt_example)
#' summary(xt_example)
#'
#' @keywords datasets
"xt_example"


================================================
FILE: R/ddSci.R
================================================
# ddSci.R
# ::rtemis::
# 2015- EDG rtemis.org

#' Format Numbers for Printing
#'
#' 2 Decimal places, otherwise scientific notation
#'
#' Numbers will be formatted to 2 decimal places, unless this results in 0.00 (e.g. if input was .0032),
#' in which case they will be converted to scientific notation with 2 significant figures.
#' `ddSci` will return `0.00` if the input is exactly zero.
#' This function can be used to format numbers in plots, on the console, in logs, etc.
#'
#' @param x Vector of numbers
#' @param decimal_places Integer: Return this many decimal places.
#' @param hi Float: Threshold at or above which scientific notation is used.
#' @param as_numeric Logical: If TRUE, convert to numeric before returning.
#' This will not force all numbers to print 2 decimal places. For example:
#' 1.2035 becomes "1.20" if `as_numeric = FALSE`, but 1.2 otherwise
#' This can be helpful if you want to be able to use the output as numbers / not just for printing.
#' @return Formatted number
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- .34876549
#' ddSci(x)
#' # "0.35"
#' x <- .00000000457823
#' ddSci(x)
#' # "4.6e-09"
ddSci <- function(x, decimal_places = 2, hi = 1e06, as_numeric = FALSE) {
  if (is.null(x)) {
    if (as_numeric) {
      return(NULL)
    } else {
      return("NULL")
    }
  }
  # Do not format factors, characters, or integers.
  if (is.factor(x) || is.character(x)) {
    return(as.character(x))
  }
  if (is.integer(x)) {
    return(x)
  }

  x <- as.list(unlist(x))

  x <- lapply(x, as.numeric)

  xf <- list()

  # Check for non-zero decimals
  # decs <- sum(unlist(x) %% 1, na.rm = TRUE) > 0

  for (i in seq(x)) {
    if (is.na(x[[i]])) {
      xf[[i]] <- NA
    } else {
      # if (decs & x[[i]] == 0) { # x[[i]] is zero but others have decimals
      if (x[[i]] == 0) {
        # always give requested decimal places
        xf[[i]] <- format(0, nsmall = decimal_places)
      } else {
        if (abs(x[[i]]) >= hi) {
          xf[[i]] <- format(
            round(x[[i]], decimal_places),
            scientific = TRUE,
            digits = decimal_places,
            nsmall = decimal_places
          )
        } else {
          # if (decs) {
          #   xf[[i]] <- ifelse(round(x[[i]], 2) != 0,
          #     format(round(x[[i]], decimal_places), nsmall = decimal_places),
          #     format(x[[i]], scientific = TRUE, digits = 2)
          #   )
          # } else {
          #   xf[[i]] <- as.character(x[[i]])
          # }
          xf[[i]] <- ifelse(
            round(x[[i]], 2) != 0,
            format(round(x[[i]], decimal_places), nsmall = decimal_places),
            format(x[[i]], scientific = TRUE, digits = 2)
          )
        }
      }
    }
  }
  xf <- as.character(xf)
  if (as_numeric) {
    xf <- as.numeric(xf)
  }
  xf
} # /rtemis::ddSci


================================================
FILE: R/ddb.R
================================================
# ddb.R
# ::rtemis::
# 2022- EDG rtemis.org

#' Read CSV using DuckDB
#'
#' Lazy-read a CSV file, optionally: filter rows, remove duplicates,
#' clean column names, convert character to factor, collect.
#'
#' @param filename Character: file name; either full path or just the file name,
#' if `datadir` is also provided.
#' @param datadir Character: Optional path if `filename` is not full path.
#' @param sep Character: Field delimiter/separator.
#' @param header Logical: If TRUE, first line will be read as column names.
#' @param quotechar Character: Quote character.
#' @param ignore_errors Logical: If TRUE, ignore parsing errors (sometimes it's
#' either this or no data, so).
#' @param make_unique Logical: If TRUE, keep only unique rows.
#' @param select_columns Character vector: Column names to select.
#' @param filter_column Character: Name of column to filter on, e.g. "ID".
#' @param filter_vals Numeric or Character vector: Values in `filter_column` to keep.
#' `filter_column` to keep.
#' @param character2factor Logical: If TRUE, convert character columns to
#' factors.
#' @param collect Logical: If TRUE, collect data and return structure class
#' as defined by `returnobj`.
#' @param progress Logical: If TRUE, print progress (no indication this works).
#' @param returnobj Character: "data.frame" or "data.table" object class to
#' return. If "data.table", data.frame object returned from
#' `DBI::dbGetQuery` is passed to `data.table::setDT`; will add to
#' execution time if very large, but then that's when you need a data.table.
#' @param data.table.key Character: If set, this corresponds to a column name in the
#' dataset. This column will be set as key in the data.table output.
#' @param clean_colnames Logical: If TRUE, clean colnames with
#' [clean_colnames].
#' @param verbosity Integer: Verbosity level.
#'
#' @return data.frame or data.table if `collect` is TRUE, otherwise a character with the SQL query
#'
#' @author EDG
#' @export
#'
#' @examples
#' \dontrun{
#'   # Requires local CSV file; replace with your own path
#'   ir <- ddb_data("/Data/massive_dataset.csv",
#'     filter_column = "ID",
#'     filter_vals = 8001:9999
#'   )
#' }
ddb_data <- function(
  filename,
  datadir = NULL,
  sep = ",",
  header = TRUE,
  quotechar = "",
  ignore_errors = TRUE,
  make_unique = TRUE,
  select_columns = NULL,
  filter_column = NULL,
  filter_vals = NULL,
  character2factor = FALSE,
  collect = TRUE,
  progress = TRUE,
  returnobj = c("data.table", "data.frame"),
  data.table.key = NULL,
  clean_colnames = TRUE,
  verbosity = 1L
) {
  # Intro ----
  check_dependencies("DBI", "duckdb")
  returnobj <- match.arg(returnobj)
  if (!is.null(data.table.key)) {
    returnobj <- "data.table"
  }
  path <- if (is.null(datadir)) {
    normalizePath(filename)
  } else {
    file.path(normalizePath(datadir), filename)
  }
  check_files(path, verbosity = 0L)
  fileext <- tools::file_ext(path)

  out <- paste(
    bold(highlight("\u25B6")),
    ifelse(collect, "Reading", "Lazy-reading"),
    highlight(basename(path))
  )
  if (!is.null(filter_column)) {
    out <- paste(
      out,
      bold(highlight("\u29e8")),
      "filtering on",
      bold(filter_column)
    )
  }
  start_time <- intro(out, verbosity = verbosity)
  distinct <- ifelse(make_unique, "DISTINCT ", NULL)
  select <- if (!is.null(select_columns)) {
    ls2sel(select_columns)
  } else {
    "*"
  }

  # SQL ----
  sql <- if (fileext == "parquet") {
    paste0(
      "SELECT ",
      paste0(distinct, select),
      " FROM read_parquet('",
      path,
      "')"
    )
  } else {
    paste0(
      "SELECT ",
      paste0(distinct, select),
      " FROM read_csv_auto('",
      path,
      "',
            sep='",
      sep,
      "', quote='",
      quotechar,
      "',
            header=",
      header,
      ", ignore_errors=",
      ignore_errors,
      ")"
    )
  }

  sql <- if (!is.null(filter_column)) {
    vals <- if (is.numeric(filter_vals)) {
      paste0(filter_vals, collapse = ", ")
    } else {
      paste0("'", paste0(filter_vals, collapse = "', '"), "'")
    }
    paste(
      sql,
      "WHERE",
      filter_column,
      "in (",
      vals,
      ");"
    )
  } else {
    paste0(sql, ";")
  }

  # Collect ----
  if (collect) {
    conn <- DBI::dbConnect(duckdb::duckdb())
    on.exit(DBI::dbDisconnect(conn, shutdown = TRUE))
    # on.exit(
    #     tryCatch(DBI::dbRollback(conn), error = function(e) {
    # }))
    if (progress) {
      DBI::dbExecute(conn, "PRAGMA enable_progress_bar;")
    }
    out <- DBI::dbGetQuery(conn, sql)
    if (clean_colnames) {
      names(out) <- clean_colnames(out)
    }
    if (returnobj == "data.table") {
      data.table::setDT(out)
      if (!is.null(data.table.key)) {
        data.table::setkeyv(out, data.table.key)
      }
    }
    if (character2factor) {
      out <- preprocess(out, setup_Preprocessor(character2factor = TRUE))
    }
  } else {
    out <- sql
  }

  # Outro ----
  outro(start_time, verbosity = verbosity)
  out
} # /rtemis::ddb_data


# output: '"alpha", "beta", "gamma"'
ls2sel <- function(x) {
  paste0(
    '"',
    paste0(x, collapse = '", "'),
    '"'
  )
}


#' Collect a lazy-read duckdb table
#'
#' Collect a table read with `ddb_data(x, collect = FALSE)`
#'
#' @param sql Character: DuckDB SQL query, usually output of
#' [ddb_data] with `collect = FALSE`
#' @param progress Logical: If TRUE, show progress bar
#' @param returnobj Character: data.frame or data.table: class of object to return
#'
#' @return `data.frame` or `data.table`.
#'
#' @author EDG
#' @export
#'
#' @examples
#' \dontrun{
#'   # Requires local CSV file; replace with your own path
#'   sql <- ddb_data("/Data/iris.csv", collect = FALSE)
#'   ir <- ddb_collect(sql)
#' }
ddb_collect <- function(
  sql,
  progress = TRUE,
  returnobj = c("data.frame", "data.table")
) {
  returnobj <- match.arg(returnobj)
  conn <- DBI::dbConnect(duckdb::duckdb())
  on.exit(DBI::dbDisconnect(conn, shutdown = TRUE))
  if (progress) {
    DBI::dbExecute(conn, "PRAGMA enable_progress_bar;")
  }
  out <- DBI::dbGetQuery(conn, sql)
  if (returnobj == "data.table") {
    setDT(out)
  }
  out
} # /rtemis::ddb_collect


================================================
FILE: R/decomp.R
================================================
# decomp.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% decomp ----
#' Perform Data Decomposition
#'
#' Perform linear or non-linear decomposition of numeric data.
#'
#' @details
#' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation.
#'
#' @param x Matrix or data frame: Input data.
#' @param algorithm Character: Decomposition algorithm.
#' @param config DecompositionConfig: Algorithm-specific config.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `Decomposition` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' iris_pca <- decomp(exc(iris, "Species"), algorithm = "PCA")
decomp <- function(x, algorithm = "ICA", config = NULL, verbosity = 1L) {
  # Checks ----
  if (is.null(config)) {
    config <- get_default_decomparams(algorithm)
  }
  check_is_S7(config, DecompositionConfig)

  # Intro ----
  start_time <- intro(verbosity = verbosity)

  # Data ----
  if (verbosity > 0L) {
    summarize_unsupervised(x)
  }

  # Decompose ----
  algorithm <- get_decom_name(algorithm)
  if (verbosity > 0L) {
    msg0("Decomposing with ", algorithm, "...")
  }

  # decomp_ -> list with elements 'decom' and 'transformed'
  decom <- decomp_(config = config, x = x, verbosity = verbosity)

  # Outro ----
  outro(start_time, verbosity = verbosity)
  Decomposition(
    algorithm = algorithm,
    config = config,
    decom = decom[["decom"]],
    transformed = decom[["transformed"]]
  )
} # /rtemis::decomp


================================================
FILE: R/decomp_ICA.R
================================================
# decom_ICA.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% decomp_.ICAConfig ----
#' ICA Decomposition
#'
#' @keywords internal
#' @noRd
method(decomp_, ICAConfig) <- function(config, x, verbosity = 1L) {
  # Checks ----
  check_dependencies("fastICA")
  check_unsupervised_data(x = x, allow_missing = FALSE)

  # Decompose ----
  if (verbosity > 0L) {
    msg("Decomposing with", config@algorithm, "...")
  }
  decom <- fastICA::fastICA(
    X = as.matrix(x),
    n.comp = config[["k"]],
    alg.typ = config[["type"]],
    fun = config[["fun"]],
    alpha = config[["alpha"]],
    method = "C",
    row.norm = config[["row_norm"]],
    maxit = config[["maxit"]],
    tol = config[["tol"]],
    verbose = verbosity > 0L
  )
  check_inherits(decom, "list")
  list(decom = decom, transformed = decom[["S"]])
} # /rtemis::decomp_.ICAConfig


================================================
FILE: R/decomp_Isomap.R
================================================
# decom_Isomap.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% decomp_.IsomapConfig ----
#' Isomap Decomposition
#'
#' @keywords internal
#' @noRd
method(decomp_, IsomapConfig) <- function(config, x, verbosity = 1L) {
  # Checks ----
  check_dependencies("vegan")
  check_unsupervised_data(x = x, allow_missing = FALSE)

  # Decompose ----
  if (verbosity > 0L) {
    msg("Decomposing with", config@algorithm, "...")
  }
  dst <- vegan::vegdist(x = x, method = config[["dist_method"]])
  decom <- vegan::isomap(
    dist = dst,
    ndim = config[["k"]],
    k = config[["nsd"]],
    path = config[["path"]]
  )
  check_inherits(decom, "isomap")
  list(decom = decom, transformed = decom[["points"]])
} # /rtemis::decomp_.IsomapConfig


================================================
FILE: R/decomp_NMF.R
================================================
# decom_NMF.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% decomp_.NMFConfig ----
#' Non-negative Matrix Factorization (NMF)
#'
#' Decomposes a data matrix into non-negative factors using NMF.
#'
#' @param x A numeric matrix or data frame to be decomposed.
#' @param config `NMFConfig` object.
#' @param verbosity Integer: Verbosity level.
#'
#' @return A list containing the decomposition and transformed data.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(decomp_, NMFConfig) <- function(config, x, verbosity = 1L) {
  # Checks ----
  check_is_S7(config, NMFConfig)
  check_dependencies("NMF")
  check_unsupervised_data(x = x, allow_missing = FALSE)

  # Decompose ----
  if (verbosity > 0L) {
    msg("Decomposing with", config@algorithm, "...")
  }
  xm <- as.matrix(x)
  args <- list(x = t(xm), rank = config[["k"]], nrun = config[["nrun"]])
  decom <- do_call(NMF::nmf, args)
  check_inherits(decom, "NMFfit")
  basis <- NMF::basis(decom)
  transformed <- xm %*% basis
  colnames(transformed) <- paste0("NMF_", seq_len(NCOL(transformed)))
  list(decom = decom, transformed = transformed)
} # /rtemis::decomp_.NMFConfig


================================================
FILE: R/decomp_PCA.R
================================================
# decom_PCA.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% decomp_.PCAConfig ----
#' PCA Decomposition
#'
#' @keywords internal
#' @noRd
method(decomp_, PCAConfig) <- function(config, x, verbosity = 1L) {
  # Checks ----
  check_is_S7(config, PCAConfig)
  check_unsupervised_data(x = x, allow_missing = FALSE)

  # Decompose ----
  if (verbosity > 0L) {
    msg("Decomposing with", config@algorithm, "...")
  }
  decom <- prcomp(
    x = x,
    center = config[["center"]],
    scale. = config[["scale"]],
    tol = config[["tol"]],
    rank. = config[["k"]]
  )
  check_inherits(decom, "prcomp")
  list(decom = decom, transformed = decom[["x"]])
} # /rtemis::decomp_.PCAConfig


================================================
FILE: R/decomp_UMAP.R
================================================
# decom_UMAP.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% decomp_.UMAPConfig ----
#' UMAP Decomposition
#'
#' @param x A numeric matrix or data frame to be decomposed.
#' @param config `UMAPConfig` object.
#' @param verbosity Integer: Verbosity level.
#'
#' @return A list containing the decomposition and transformed data.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(decomp_, UMAPConfig) <- function(config, x, verbosity = 1L) {
  # Checks ----
  check_is_S7(config, UMAPConfig)
  check_dependencies("uwot")
  check_unsupervised_data(x = x, allow_missing = FALSE)

  # Decompose ----
  if (verbosity > 0L) {
    msg("Decomposing with", config@algorithm, "...")
  }
  args <- c(
    list(X = x, n_components = config[["k"]], ret_model = TRUE),
    config@config
  )
  args[["k"]] <- NULL
  decom <- do_call(
    uwot::umap,
    args,
    error_pattern_suggestion = list(
      "as_cholmod_sparse" = "Try installing packages 'Matrix' and 'irlba' from source."
    )
  )
  # ret_model = TRUE returns list
  check_inherits(decom, "list")
  list(decom = decom, transformed = decom[["embedding"]])
} # /rtemis::decomp_.UMAPConfig


================================================
FILE: R/decomp_tSNE.R
================================================
# decom_tSNE.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% decomp_.tSNEConfig ----
#' tSNE Decomposition
#'
#' @keywords internal
#' @noRd
method(decomp_, tSNEConfig) <- function(config, x, verbosity = 1L) {
  # Checks ----
  check_is_S7(config, tSNEConfig)
  check_dependencies("Rtsne")
  check_unsupervised_data(x = x, allow_missing = FALSE)

  # Decompose ----
  if (verbosity > 0L) {
    msg("Decomposing with", config@algorithm, "...")
  }
  args <- c(list(X = x, dims = config[["k"]]), config@config)
  args[["k"]] <- NULL
  decom <- do_call(
    Rtsne::Rtsne,
    args,
    error_pattern_suggestion = list(
      "Remove duplicates" = "Remove duplicates using `preprocess()"
    )
  )
  check_inherits(decom, "Rtsne")
  list(decom = decom, transformed = decom[["Y"]])
} # /rtemis::decomp_.tSNEConfig


================================================
FILE: R/draw_3Dscatter.R
================================================
# draw_3Dscatter.R
# ::rtemis::
# 2019- EDG rtemis.org

#' Interactive 3D Scatter Plots
#'
#' Draw interactive 3D scatter plots using `plotly`.
#'
#' @details
#' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation.
#'
#' Note that `draw_3Dscatter` uses the theme's `plot_bg` as `grid_col`.
#'
#' @param x Numeric, vector/data.frame/list: x-axis data.
#' @param y Numeric, vector/data.frame/list: y-axis data.
#' @param z Numeric, vector/data.frame/list: z-axis data.
#' @param fit Character: Fit method.
#' @param cluster Character: Clustering method.
#' @param cluster_config List: Config for clustering.
#' @param group Factor: Grouping variable.
#' @param formula Formula: Formula for non-linear least squares fit.
#' @param rsq Logical: If TRUE, print R-squared values in legend if `fit` is set.
#' @param mode Character, vector: "markers", "lines", "markers+lines".
#' @param order_on_x Logical: If TRUE, order `x` and `y` on `x`.
#' @param main Character: Main title.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param zlab Character: z-axis label.
#' @param alpha Numeric: Alpha for markers.
#' @param bg Background color.
#' @param plot_bg Plot background color.
#' @param theme `Theme` object.
#' @param palette Character vector: Colors to use.
#' @param axes_square Logical: If TRUE, draw a square plot.
#' @param group_names Character: Names for groups.
#' @param font_size Numeric: Font size.
#' @param marker_col Color for markers.
#' @param marker_size Numeric: Marker size.
#' @param fit_col Color for fit line.
#' @param fit_alpha Numeric: Alpha for fit line.
#' @param fit_lwd Numeric: Line width for fit line.
#' @param tick_font_size Numeric: Tick font size.
#' @param spike_col Spike lines color.
#' @param legend Logical: If TRUE, draw legend.
#' @param legend_xy Numeric: Position of legend.
#' @param legend_xanchor Character: X anchor for legend.
#' @param legend_yanchor Character: Y anchor for legend.
#' @param legend_orientation Character: Orientation of legend.
#' @param legend_col Color for legend text.
#' @param legend_bg Color for legend background.
#' @param legend_border_col Color for legend border.
#' @param legend_borderwidth Numeric: Border width for legend.
#' @param legend_group_gap Numeric: Gap between legend groups.
#' @param margin Numeric, named list: Margins for top, bottom, left, right.
#' @param fit_params `Hyperparameters` for fit.
#' @param width Numeric: Width of plot.
#' @param height Numeric: Height of plot.
#' @param padding Numeric: Graph padding.
#' @param displayModeBar Logical: If TRUE, display mode bar.
#' @param modeBar_file_format Character: File format for mode bar.
#' @param verbosity Integer: Verbosity level.
#' @param filename Character: Filename to save plot.
#' @param file_width Numeric: Width of saved file.
#' @param file_height Numeric: Height of saved file.
#' @param file_scale Numeric: Scale of saved file.
#'
#' @return A `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' draw_3Dscatter(iris, group = iris$Species, theme = theme_darkgraygrid())
draw_3Dscatter <- function(
  x,
  y = NULL,
  z = NULL,
  fit = NULL,
  cluster = NULL,
  cluster_config = NULL,
  group = NULL,
  formula = NULL,
  rsq = TRUE,
  mode = "markers",
  order_on_x = NULL,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  zlab = NULL,
  alpha = .8,
  bg = NULL,
  plot_bg = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  axes_square = FALSE,
  group_names = NULL,
  font_size = 16,
  marker_col = NULL,
  marker_size = 8,
  fit_col = NULL,
  fit_alpha = .7,
  fit_lwd = 2.5,
  tick_font_size = 12,
  spike_col = NULL,
  legend = NULL,
  legend_xy = c(0, 1),
  legend_xanchor = "left",
  legend_yanchor = "auto",
  legend_orientation = "v",
  legend_col = NULL,
  legend_bg = "#FFFFFF00",
  legend_border_col = "#FFFFFF00",
  legend_borderwidth = 0,
  legend_group_gap = 0,
  margin = list(t = 30, b = 0, l = 0, r = 0),
  fit_params = NULL,
  width = NULL,
  height = NULL,
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  verbosity = 0L,
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Arguments ----
  if (is.null(y) && is.null(z) && NCOL(x) > 2) {
    .colnames <- labelify(colnames(x))
    y <- x[, 2]
    z <- x[, 3]
    x <- x[, 1]
    if (is.null(xlab)) {
      xlab <- .colnames[1]
    }
    if (is.null(ylab)) {
      ylab <- .colnames[2]
    }
    if (is.null(zlab)) zlab <- .colnames[3]
  }
  if (!is.null(main)) {
    main <- paste0("<b>", main, "</b>")
  }
  if (!is.null(fit)) {
    if (fit == "none") fit <- NULL
  } # easier to work with shiny
  if (!is.null(fit)) {
    fit <- toupper(fit)
  }
  .mode <- mode
  .names <- group_names

  # order_on_x ----
  if (is.null(order_on_x)) {
    order_on_x <- if (!is.null(fit) || any(grepl("lines", mode))) {
      TRUE
    } else {
      FALSE
    }
  }

  # Cluster ----
  if (!is.null(cluster)) {
    group <- suppressWarnings(
      cluster(
        x = data.frame(x, y),
        algorithm = cluster,
        config = do_call(
          get_clust_setup_fn(cluster),
          cluster_config
        )
      )@clusters
    )
    group <- paste("Cluster", group)
  }

  # Data ----
  # xlab, ylab ----
  # The gsubs remove all text up to and including a "$" symbol if present
  if (is.null(xlab)) {
    if (is.list(x)) {
      xlab <- "x"
    } else {
      xlab <- labelify(gsub(".*\\$", "", deparse(substitute(x))))
    }
  }
  if (!is.null(y) && is.null(ylab)) {
    if (is.list(y)) {
      ylab <- "y"
    } else {
      ylab <- labelify(gsub(".*\\$", "", deparse(substitute(y))))
    }
  }
  if (!is.null(z) && is.null(zlab)) {
    if (is.list(z)) {
      zlab <- "z"
    } else {
      zlab <- labelify(gsub(".*\\$", "", deparse(substitute(z))))
    }
  }

  # '- Group ----
  if (!is.null(group)) {
    group <- as.factor(group)
    x <- split(x, group, drop = TRUE)
    y <- split(y, group, drop = TRUE)
    z <- split(z, group, drop = TRUE)
    if (is.null(group_names)) {
      group_names <- levels(droplevels(group))
    }
    names(x) <- names(y) <- names(z) <- .names <- group_names
  }

  # Try to get names from list or data frame inputs
  if (is.list(y) || NCOL(y) > 1) {
    if (is.null(.names) && !is.null(names(y))) .names <- names(y)
  }
  if (is.list(x) || NCOL(x) > 1) {
    if (is.null(.names) && !is.null(names(x))) .names <- names(x)
  }
  if (is.list(z) || NCOL(z) > 1) {
    if (is.null(.names) && !is.null(names(z))) .names <- names(z)
  }

  # Convert to lists ----
  x <- if (!is.list(x)) as.list(as.data.frame(x)) else x
  y <- if (!is.null(y) && !is.list(y)) as.list(as.data.frame(y)) else y
  z <- if (!is.null(z) && !is.list(z)) as.list(as.data.frame(z)) else z
  if (length(x) == 1 && length(y) > 1) {
    x <- rep(x, length(y))
    .names <- names(y)
  }
  if (length(y) == 1 && length(x) > 1) {
    y <- rep(y, length(x))
    .names <- names(x)
  }
  if (length(z) == 1 && length(x) > 1) {
    z <- rep(z, length(x))
    .names <- names(x)
  }
  n_groups <- length(x)

  # legend <- if (is.null(legend) & n_groups == 1 & is.null(fit)) FALSE else TRUE
  legend <- if (is.null(legend) && n_groups == 1) FALSE else TRUE

  if (length(.mode) < n_groups) {
    .mode <- c(.mode, rep(tail(.mode)[1], n_groups - length(.mode)))
  }

  # if (is.null(legend)) legend <- n_groups > 1
  if (is.null(.names)) {
    if (n_groups > 1) {
      .names <- paste("Group", seq_len(n_groups))
    } else {
      .names <- if (!is.null(fit)) fit else NULL
      .names <- NULL
    }
  }

  # Reorder ----
  if (order_on_x) {
    index <- lapply(x, order)
    x <- lapply(seq(x), function(i) x[[i]][index[[i]]])
    y <- lapply(seq(x), function(i) y[[i]][index[[i]]])
    z <- lapply(seq(x), function(i) z[[i]][index[[i]]])
  }

  # s.e. fit ----
  se_fit <- FALSE
  # if (se_fit) {
  #   if (!fit %in% c("GLM", "LM", "LOESS", "GAM", "NW")) {
  #     warning(paste("Standard error of the fit not available for", fit, "- try LM, LOESS, GAM, or NW"))
  #     se_fit <- FALSE
  #   }
  # }

  # Colors ----
  col <- recycle(palette, seq_len(n_groups))

  # Convert inputs to RGB
  spike_col <- plotly::toRGB(spike_col)

  # Theme ----
  axes_visible <- FALSE
  axes_mirrored <- FALSE
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]], theme[["grid_alpha"]])
  tick_col <- plotly::toRGB(theme[["tick_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])
  if (!theme[["axes_visible"]]) {
    tick_col <- labs_col <- "transparent"
  }

  # marker_col, se_col ----
  if (is.null(marker_col)) {
    marker_col <- if (!is.null(fit) && n_groups == 1) {
      as.list(rep(theme[["fg"]], n_groups))
    } else {
      col
    }
  }

  if (!is.null(fit)) {
    if (is.null(fit_col)) fit_col <- col
  }

  # Derived
  if (is.null(legend_col)) {
    legend_col <- labs_col
  }

  # Size ----
  if (axes_square) {
    width <- height <- min(dev.size("px")) - 10
  }

  # fitted & se_fit ----
  # If plotting se bands, need to include (fitted +/- se.times * se) in the axis limits
  if (se_fit) {
    se <- list()
  } else {
    se <- NULL
  }
  if (!is.null(fit)) {
    # learner <- get_train_fn(fit)
    fitted <- list()
    fitted_text <- character()
    for (i in seq_len(n_groups)) {
      df1 <- data.frame(x[[i]], y[[i]], z[[i]])
      mod <- train(
        df1,
        algorithm = fit,
        hyperparameters = fit_params,
        verbosity = verbosity
      )
      fitted[[i]] <- fitted(mod)
      if (se_fit) {
        se[[i]] <- se(mod)
      }
      fitted_text[i] <- fit
      if (rsq) {
        fitted_text[i] <- paste0(
          fitted_text[i],
          if (n_groups == 1) " (" else " ",
          "R<sup>2</sup> = ",
          ddSci(mod@metrics_training[["Rsq"]]),
          if (n_groups == 1) ")"
        )
      }
    }
  }

  # plotly ----
  plt <- plotly::plot_ly(
    width = width,
    height = height
  )
  for (i in seq_len(n_groups)) {
    # '- { Scatter } ----
    marker <- if (grepl("markers", .mode[i])) {
      list(
        color = plotly::toRGB(marker_col[[i]], alpha = alpha),
        size = marker_size
      )
    } else {
      NULL
    }
    plt <- plotly::add_trace(
      plt,
      x = x[[i]],
      y = y[[i]],
      z = z[[i]],
      type = "scatter3d",
      mode = .mode[i],
      # fillcolor = plotly::toRGB(col[[i]], alpha),
      # name = if (n_groups > 1) .names[i] else "Raw",
      name = .names[i],
      # text = .text[[i]],
      # hoverinfo = "text",
      # marker = if (grepl("markers", .mode[i])) list(color = plotly::toRGB(marker_col[[i]], alpha = alpha)) else NULL,
      marker = marker,
      line = if (grepl("lines", .mode[i])) {
        list(color = plotly::toRGB(marker_col[[i]], alpha = alpha))
      } else {
        NULL
      },
      legendgroup = if (n_groups > 1) .names[i] else "Raw",
      showlegend = legend
    )
    # if (se_fit) {
    #   # '- { SE band } ----
    #   plt <- plotly::add_trace(plt,
    #                            x = x[[i]],
    #                            y = fitted[[i]] + se.times * se[[i]],
    #                            type = "scatter",
    #                            mode = "lines",
    #                            line = list(color = "transparent"),
    #                            legendgroup = .names[i],
    #                            showlegend = FALSE,
    #                            hoverinfo = "none",
    #                            inherit = FALSE)
    #   plt <- plotly::add_trace(plt, x = x[[i]],
    #                            y = fitted[[i]] - se.times * se[[i]],
    #                            type = "scatter",
    #                            mode = "lines",
    #                            fill = "tonexty",
    #                            fillcolor = plotly::toRGB(se_col[[i]], alpha = se.alpha),
    #                            line = list(color = "transparent"),
    #                            # name = shade.name,
    #                            legendgroup = .names[i],
    #                            showlegend = FALSE,
    #                            hoverinfo = "none",
    #                            inherit = FALSE)
    # }

    if (!is.null(fit)) {
      # '- { Fitted mesh } ----
      plt <- plotly::add_trace(
        plt,
        x = x[[i]],
        y = y[[i]],
        z = fitted[[i]],
        type = "mesh3d",
        opacity = fit_alpha,
        name = fitted_text[i],
        # legendgroup = .names[i],
        # showlegend = if (legend & n_groups == 1) TRUE else FALSE,
        inherit = FALSE,
        showscale = FALSE,
        intensity = 1,
        colorscale = list(
          c(0, plotly::toRGB(fit_col[[i]])),
          c(1, plotly::toRGB(fit_col[[i]]))
        )
      )
    }
  }
  # Layout ----
  # '- layout ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )
  tickfont <- list(
    family = theme[["font_family"]],
    size = tick_font_size,
    color = theme[["tick_labels_col"]]
  )
  .legend <- list(
    x = legend_xy[1],
    xanchor = legend_xanchor,
    y = legend_xy[2],
    yanchor = legend_yanchor,
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = legend_col
    ),
    orientation = legend_orientation,
    bgcolor = plotly::toRGB(legend_bg),
    bordercolor = plotly::toRGB(legend_border_col),
    borderwidth = legend_borderwidth,
    tracegroupgap = legend_group_gap
  )

  plt <- plotly::layout(
    plt,
    scene = list(
      yaxis = list(
        title = ylab,
        showline = axes_visible,
        mirror = axes_mirrored,
        titlefont = f,
        showgrid = theme[["grid"]],
        gridcolor = grid_col,
        gridwidth = theme[["grid_lwd"]],
        tickcolor = tick_col,
        tickfont = tickfont,
        zeroline = FALSE,
        spikecolor = spike_col
      ),
      xaxis = list(
        title = xlab,
        showline = axes_visible,
        mirror = axes_mirrored,
        titlefont = f,
        showgrid = theme[["grid"]],
        gridcolor = grid_col,
        gridwidth = theme[["grid_lwd"]],
        tickcolor = tick_col,
        tickfont = tickfont,
        zeroline = FALSE,
        spikecolor = spike_col
      ),
      zaxis = list(
        title = zlab,
        showline = axes_visible,
        mirror = axes_mirrored,
        titlefont = f,
        showgrid = theme[["grid"]],
        gridcolor = grid_col,
        gridwidth = theme[["grid_lwd"]],
        tickcolor = tick_col,
        tickfont = tickfont,
        zeroline = FALSE,
        spikecolor = spike_col
      )
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      )
    ),
    # titlefont = list(),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    margin = margin,
    showlegend = legend,
    legend = .legend
  )

  # Padding
  plt[["sizingPolicy"]][["padding"]] <- padding
  # Config
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    )
  )

  # Write to file ----
  if (!is.null(filename)) {
    plotly::save_image(
      plt,
      file = file.path(filename),
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  }

  plt
} # /rtemis::draw_3Dscatter


================================================
FILE: R/draw_bar.R
================================================
# draw_bar.R
# ::rtemis::
# 2019-22 EDG rtemis.org

#' Interactive Barplots
#'
#' Draw interactive barplots using `plotly`
#'
#' @details
#' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation.
#'
#' @param x vector (possibly named), matrix, or data.frame: If matrix or
#' data.frame, rows are groups (can be 1 row), columns are features
#' @param main Character: Main plot title.
#' @param xlab Character: x-axis label.
#' @param ylab  Character: y-axis label.
#' @param alpha Float (0, 1]: Transparency for bar colors.
#' @param theme `Theme` object.
#' @param palette Character vector: Colors to use.
#' @param barmode Character: Type of bar plot to make: "group", "relative",
#' "stack", "overlay". Default = "group". Use
#' "relative" for stacked bars, wich handles negative values correctly,
#' unlike "stack", as of writing.
#' @param group_names Character, vector, length = NROW(x): Group names.
#' Default = NULL, which uses `rownames(x)`
#' @param order_by_val Logical: If TRUE, order bars by increasing value.
#' Only use for single group data.
#' @param ylim Float, vector, length 2: y-axis limits.
#' @param hovernames Character, vector: Optional character vector to show on
#' hover over each bar.
#' @param feature_names Character, vector, length = NCOL(x): Feature names.
#' Default = NULL, which uses `colnames(x)`
#' @param font_size  Float: Font size for all labels.
#' @param legend Logical: If TRUE, draw legend. Default = NULL, and will be
#' turned on if there is more than one feature present
#' @param legend_col Color: Legend text color. Default = NULL, determined by
#' theme
#' @param hline Float: If defined, draw a horizontal line at this y value.
#' @param hline_col Color for `hline`.
#' @param hline_width Float: Width for `hline`.
#' @param hline_dash Character: Type of line to draw: "solid", "dot", "dash",
#' "longdash", "dashdot",
#' or "longdashdot"
#' @param hline_annotate Character: Text of horizontal line annotation if
#' `hline` is set
#' @param hline_annotation_x Numeric: x position to place annotation with paper
#' as reference. 0: to the left of the plot area; 1: to the right of the plot area
#' @param margin Named list: plot margins.
#' @param padding Integer: N pixels to pad plot.
#' @param horizontal Logical: If TRUE, plot bars horizontally
#' @param annotate Logical: If TRUE, annotate stacked bars
#' @param annotate_col Color for annotations
#' @param legend_xy Numeric, vector, length 2: x and y for plotly's legend
#' @param legend_orientation "v" or "h" for vertical or horizontal
#' @param legend_xanchor Character: Legend's x anchor: "left", "center",
#' "right", "auto"
#' @param legend_yanchor Character: Legend's y anchor: "top", "middle",
#' "bottom", "auto"
#' @param automargin_x Logical: If TRUE, automatically set x-axis margins
#' @param automargin_y Logical: If TRUE, automatically set y-axis margins
#' @param displayModeBar Logical: If TRUE, show plotly's modebar
#' @param modeBar_file_format Character: "svg", "png", "jpeg", "pdf" / any
#' output file type supported by plotly and your system
# @param print_plot Logical: If TRUE, print plot, otherwise return it invisibly
#' @param filename Character: Path to file to save static plot.
#' @param file_width Integer: File width in pixels for when `filename` is
#' set.
#' @param file_height Integer: File height in pixels for when `filename`
#' is set.
#' @param file_scale Numeric: If saving to file, scale plot by this number
#' @param verbosity Integer: Verbosity level.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' draw_bar(VADeaths, legend_xy = c(0, 1))
#' draw_bar(VADeaths, legend_xy = c(1, 1), legend_xanchor = "left")
#' # simple individual bars
#' a <- c(4, 7, 2)
#' draw_bar(a)
#' # if input is a data.frame, each row is a group and each column is a feature
#' b <- data.frame(x = c(3, 5, 7), y = c(2, 1, 8), z = c(4, 5, 2))
#' rownames(b) <- c("Jen", "Ben", "Ren")
#' draw_bar(b)
#' # stacked
#' draw_bar(b, barmode = "stack")
draw_bar <- function(
  x,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  alpha = 1,
  horizontal = FALSE,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  barmode = c("group", "relative", "stack", "overlay"),
  group_names = NULL,
  order_by_val = FALSE,
  ylim = NULL,
  hovernames = NULL,
  feature_names = NULL,
  font_size = 16,
  annotate = FALSE,
  annotate_col = theme[["labs_col"]],
  legend = NULL,
  legend_col = NULL,
  legend_xy = c(1, 1),
  legend_orientation = "v",
  legend_xanchor = "left",
  legend_yanchor = "auto",
  hline = NULL,
  hline_col = NULL,
  hline_width = 1,
  hline_dash = "solid",
  hline_annotate = NULL,
  hline_annotation_x = 1,
  margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0),
  automargin_x = TRUE,
  automargin_y = TRUE,
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  verbosity = 0L
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Arguments ----
  barmode <- match.arg(barmode)
  if (!is.null(main)) {
    main <- paste0("<b>", main, "</b>")
  }

  dat <- as.data.frame(x)
  if (NROW(dat) == 1 && barmode != "stack") {
    dat <- as.data.frame(t(dat))
  }

  # Order by val ----
  if (order_by_val) {
    if (NCOL(dat) > 1) {
      order_ <- order(sapply(dat, mean, na.rm = TRUE))
      dat <- dat[, order_]
    } else {
      order_ <- order(dat[[1]])
      dat <- dat[order_, , drop = FALSE]
    }
    if (!is.null(group_names)) {
      group_names <- group_names[order_]
    }
    if (!is.null(hovernames)) hovernames <- hovernames[order_]
  }

  # Group names ----
  group_names_ <- group_names
  if (is.null(group_names)) {
    if (!is.null(rownames(dat))) group_names_ <- rownames(dat)
  }

  if (verbosity > 0L) {
    msg("group_names_:", group_names_, "\n")
  }

  # Feature names ----
  feature_names_ <- feature_names
  if (is.null(feature_names_)) {
    if (!is.null(colnames(dat))) {
      feature_names_ <- labelify(colnames(dat))
    } else {
      feature_names_ <- paste0("Feature", seq_len(NCOL(dat)))
    }
  }

  if (verbosity > 0L) {
    msg("feature_names_:", feature_names_, "\n")
  }
  if (is.null(legend)) {
    legend <- length(feature_names_) > 1
  }

  # Colors ----
  p <- NCOL(dat)
  col <- recycle(palette, seq(p))[seq(p)]

  # Theme ----
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]])
  tick_col <- plotly::toRGB(theme[["tick_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])

  # Derived
  if (is.null(legend_col)) {
    legend_col <- labs_col
  }

  if (!is.null(hovernames)) {
    hovernames <- matrix(hovernames)
    if (NCOL(hovernames) == 1 && p > 1) {
      hovernames <- matrix(rep(hovernames, p), ncol = p)
    }
  }

  # plot_ly ----
  group_names_ <- factor(group_names_, levels = group_names_)
  plt <- plotly::plot_ly(
    x = if (horizontal) dat[[1]] else group_names_,
    y = if (horizontal) group_names_ else dat[[1]],
    type = "bar",
    name = feature_names_[1],
    text = hovernames[, 1],
    marker = list(color = plotly::toRGB(if (p > 1) col[1] else col, alpha)),
    showlegend = legend
  )
  if (p > 1) {
    for (i in seq_len(p)[-1]) {
      plt <- plotly::add_trace(
        plt,
        x = if (horizontal) dat[[i]] else group_names_,
        y = if (horizontal) group_names_ else dat[[i]],
        name = feature_names_[i],
        text = hovernames[, i],
        marker = list(color = plotly::toRGB(col[i], alpha))
      )
    }
  }

  if (annotate) {
    if (barmode != "stack") {
      warning("Set barmode to 'stack' to allow annotation")
    } else {
      if (horizontal) {
        for (i in seq_len(ncol(dat))) {
          plt <- plt |>
            plotly::add_annotations(
              xref = "x",
              yref = "y",
              x = rowSums(dat[, seq_len(i - 1), drop = FALSE]) + dat[, i] / 2,
              y = seq_len(nrow(dat)) - 1,
              text = paste(dat[, i]),
              font = list(
                family = theme[["font_family"]],
                size = font_size,
                color = annotate_col
              ),
              showarrow = FALSE
            )
        }
      } else {
        for (i in seq_len(ncol(dat))) {
          plt <- plt |>
            plotly::add_annotations(
              xref = "x",
              yref = "y",
              x = seq_len(nrow(dat)) - 1,
              y = rowSums(dat[, seq_len(i - 1), drop = FALSE]) + dat[, i] / 2,
              text = paste(signif(dat[, i], 2)),
              font = list(
                family = theme[["font_family"]],
                size = font_size,
                color = annotate_col
              ),
              showarrow = FALSE
            )
        }
      }
    }
  }

  # Layout ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )
  tickfont <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = theme[["tick_labels_col"]]
  )
  legend_ <- list(
    x = legend_xy[1],
    y = legend_xy[2],
    xanchor = legend_xanchor,
    yanchor = legend_yanchor,
    bgcolor = "#ffffff00",
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = legend_col
    ),
    orientation = legend_orientation
  )

  plt <- plotly::layout(
    plt,
    yaxis = list(
      title = ylab,
      # showline = axes_visible,
      # mirror = axes_mirrored,
      range = ylim,
      titlefont = f,
      showgrid = theme[["grid"]],
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tickfont,
      zeroline = FALSE,
      automargin = automargin_y
    ),
    xaxis = list(
      title = xlab,
      # showline = axes_visible,
      # mirror = axes_mirrored,
      titlefont = f,
      showgrid = FALSE,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tickfont,
      automargin = automargin_x
    ),
    barmode = barmode, # group works without actual groups too
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]]
    ),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    margin = margin,
    # showlegend = legend,
    legend = legend_
  )

  # hline ----
  if (!is.null(hline)) {
    if (is.null(hline_col)) {
      hline_col <- theme[["fg"]]
    }
    hline_col <- recycle(hline_col, hline)
    hline_width <- recycle(hline_width, hline)
    hline_dash <- recycle(hline_dash, hline)
    hlinel <- lapply(seq_along(hline), function(i) {
      list(
        type = "line",
        x0 = 0,
        x1 = 1,
        xref = "paper",
        y0 = hline[i],
        y1 = hline[i],
        line = list(
          color = hline_col[i],
          width = hline_width[i],
          dash = hline_dash[i]
        )
      )
    })
    plt <- plotly::layout(plt, shapes = hlinel)

    # Annotate horizontal lines on the right border of the plot
    if (!is.null(hline_annotate)) {
      plt <- plt |>
        plotly::add_annotations(
          xref = "paper",
          yref = "y",
          xanchor = "right",
          yanchor = "bottom",
          x = hline_annotation_x,
          y = hline,
          text = hline_annotate,
          font = list(
            family = theme[["font_family"]],
            size = font_size,
            color = annotate_col
          ),
          showarrow = FALSE
        )
    }
  }

  # Padding
  plt[["sizingPolicy"]][["padding"]] <- padding

  # Config
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    )
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_bar.R


================================================
FILE: R/draw_box.R
================================================
# draw_box.R
# ::rtemis::
# EDG rtemis.org

#' Interactive Boxplots & Violin plots
#'
#' Draw interactive boxplots or violin plots using \pkg{plotly}
#'
#' @details
#' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation.
#'
#' For multiple box plots, the recommendation is:
#' - `x=dat[, columnindex]` for multiple variables of a data.frame
#' - `x=list(a=..., b=..., etc.)` for multiple variables of potentially
#' different length
#' - `x=split(var, group)` for one variable with multiple groups: group names
#' appear below boxplots
#' - `x=dat[, columnindex], group = factor` for grouping multiple variables:
#' group names appear in legend
#'
#' If `orientation == "h"`, `xlab` is applied to y-axis and vice versa.
#' Similarly, `x.axist.type` applies to y-axis - this defaults to
#' "category" and would not normally need changing.
#'
#' @param x Vector or List of vectors: Input
#' @param time Date or date-time vector
#' @param time_bin Character: "year", "quarter", "month", or "day". Period to
#' bin by
#' @param type Character: "box" or "violin"
#' @param group Factor to group by
#' @param x_transform Character: "none", "scale", or "minmax" to use raw values,
#' scaled and centered values or min-max normalized to 0-1, respectively.
#' Transform is applied to each variable before grouping, so that groups are
#' comparable
#' @param main Character: Plot title.
#' @param xlab Character: x-axis label.
#' @param ylab  Character: y-axis label.
#' @param alpha Float (0, 1]: Transparency for box colors.
#' @param bg Color: Background color.
#' @param plot_bg Color: Background color for plot area.
#' @param theme `Theme` object.
#' @param palette Character vector: Colors to use.
#' @param quartilemethod Character: "linear", "exclusive", "inclusive"
#' @param xlim Numeric vector: x-axis limits
#' @param ylim Numeric vector: y-axis limits
#' @param boxpoints Character or FALSE: "all", "suspectedoutliers", "outliers"
#' See <https://plotly.com/r/box-plots/#choosing-the-algorithm-for-computing-quartiles>
#' @param xnames Character, vector, length = NROW(x): x-axis names. Default = NULL, which
#' tries to set names automatically.
#' @param group_lines Logical: If TRUE, add separating lines between groups of
#' boxplots
#' @param group_lines_dash Character: "solid", "dot", "dash", "longdash",
#' "dashdot", or "longdashdot"
#' @param group_lines_col Color for `group_lines`
#' @param group_lines_alpha Numeric: transparency for `group_lines_col`
#' @param order_by_fn Function: If defined, order boxes by increasing value of
#' this function (e.g. median).
#' @param font_size  Float: Font size for all labels.
#' @param ylab_standoff Numeric: Standoff for y-axis label
#' @param legend Logical: If TRUE, draw legend.
#' @param legend_col Color: Legend text color. Default = NULL, determined by
#' the theme.
#' @param legend_xy Float, vector, length 2: Relative x, y position for legend.
#' @param xaxis_type Character: "linear", "log", "date", "category",
#' "multicategory"
#' @param cataxis_tickangle Numeric: Angle for categorical axis tick labels
#' @param margin Named list: plot margins.
#' @param violin_box Logical: If TRUE and type is "violin" show box within
#' violin plot
#' @param orientation Character: "v" or "h" for vertical, horizontal
#' @param annotate_n Logical: If TRUE, annotate with N in each box
#' @param annotate_n_y Numeric: y position for `annotate_n`
#' @param annotate_mean Logical: If TRUE, annotate with mean of each box
#' @param annotate_meansd Logical: If TRUE, annotate with mean (SD) of each box
#' @param annotate_meansd_y Numeric: y position for `annotate_meansd`
#' @param annotate_col Color for annotations
#' @param labelify Logical: If TRUE, [labelify] x names
#' @param legend_orientation "v" or "h" for vertical, horizontal
#' @param legend_xanchor Character: Legend's x anchor: "left", "center",
#' "right", "auto"
#' @param legend_yanchor Character: Legend's y anchor: "top", "middle",
#' "bottom", "auto"
#' @param automargin_x Logical: If TRUE, automatically set x-axis margins
#' @param automargin_y Logical: If TRUE, automatically set y-axis margins
#' @param boxgroupgap Numeric: Sets the gap (in plot fraction) between boxes
#' of the same location coordinate
#' @param hovertext Character vector: Text to show on hover for each data point
#' @param show_n Logical: If TRUE, show N in each box
#' @param pvals Numeric vector: Precomputed p-values. Should correspond to each box.
#' Bypasses `htest` and `htest_compare`. Requires `group` to be set
#' @param htest Character: e.g. "t.test", "wilcox.test" to compare each box to
#' the *first* box. If grouped, compare within each group to the first box.
#' If p-value of test is less than `htest.thresh`, add asterisk above/
#' to the side of each box
#' @param htest_compare Integer: 0: Compare all distributions against the first one;
#' 2: Compare every second box to the one before it. Requires `group` to
#' be set
#' @param htest_y Numeric: y coordinate for `htest` annotation
#' @param htest_annotate Logical: if TRUE, include htest annotation
#' @param htest_annotate_x Numeric: x-axis paper coordinate for htest annotation
#' @param htest_annotate_y Numeric: y-axis paper coordinate for htest annotation
#' @param htest_star_col Color for htest annotation stars
#' @param htest_bracket_col Color for htest annotation brackets
#' @param starbracket_pad Numeric: Padding for htest annotation brackets
#' @param use_plotly_group If TRUE, use plotly's `group` arg to group
#' boxes.
#' @param width Numeric: Force plot size to this width. Default = NULL, i.e. fill
#' available space
#' @param height Numeric: Force plot size to this height. Default = NULL, i.e. fill
#' available space
#' @param displayModeBar Logical: If TRUE, show plotly's modebar
#' @param filename Character: Path to file to save static plot.
#' @param modeBar_file_format Character: "svg", "png", "jpeg", "pdf"
#' @param file_width Integer: File width in pixels for when `filename` is
#' set.
#' @param file_height Integer: File height in pixels for when `filename`
#' is set.
#' @param file_scale Numeric: If saving to file, scale plot by this number
#' @param mathjax Optional Character \{"local", "cdn"\}: Whether to use local or CDN version of
#' MathJax for rendering mathematical annotations.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # A.1 Box plot of 4 variables
#' draw_box(iris[, 1:4])
#' # A.2 Grouped Box plot
#' draw_box(iris[, 1:4], group = iris[["Species"]])
#' draw_box(iris[, 1:4], group = iris[["Species"]], annotate_n = TRUE)
#' # B. Boxplot binned by time periods
#' # Synthetic data with an instantenous shift in distributions
#' set.seed(2021)
#' dat1 <- data.frame(alpha = rnorm(200, 0), beta = rnorm(200, 2), gamma = rnorm(200, 3))
#' dat2 <- data.frame(alpha = rnorm(200, 5), beta = rnorm(200, 8), gamma = rnorm(200, -3))
#' x <- rbind(dat1, dat2)
#' startDate <- as.Date("2019-12-04")
#' endDate <- as.Date("2021-03-31")
#' time <- seq(startDate, endDate, length.out = 400)
#' draw_box(x[, 1], time, "year", ylab = "alpha")
#' draw_box(x, time, "year", legend.xy = c(0, 1))
#' draw_box(x, time, "quarter", legend.xy = c(0, 1))
#' draw_box(x, time, "month",
#'   legend.orientation = "h",
#'   legend.xy = c(0, 1),
#'   legend.yanchor = "bottom"
#' )
#' # (Note how the boxplots widen when the period includes data from both dat1 and dat2)
draw_box <- function(
  x,
  time = NULL,
  time_bin = c("year", "quarter", "month", "day"),
  type = c("box", "violin"),
  group = NULL,
  x_transform = c("none", "scale", "minmax"),
  main = NULL,
  xlab = "",
  ylab = NULL,
  alpha = .6,
  bg = NULL,
  plot_bg = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  boxpoints = "outliers",
  quartilemethod = "linear",
  xlim = NULL,
  ylim = NULL,
  violin_box = TRUE,
  orientation = "v",
  annotate_n = FALSE,
  annotate_n_y = 1,
  annotate_mean = FALSE, # forr A.2.b.
  annotate_meansd = FALSE,
  annotate_meansd_y = 1,
  annotate_col = theme[["labs_col"]],
  xnames = NULL,
  group_lines = FALSE,
  group_lines_dash = "dot",
  group_lines_col = NULL,
  group_lines_alpha = .5,
  labelify = TRUE,
  order_by_fn = NULL,
  font_size = 16,
  # Axes
  ylab_standoff = 18,
  legend = NULL,
  legend_col = NULL,
  legend_xy = NULL,
  legend_orientation = "v",
  legend_xanchor = "auto",
  legend_yanchor = "auto",
  xaxis_type = "category",
  cataxis_tickangle = "auto",
  # margin = list(t = 35, pad = 0),
  margin = list(b = 65, l = 65, t = 50, r = 12, pad = 0),
  automargin_x = TRUE,
  automargin_y = TRUE,
  # boxgap = 0, #1/nvars, #.12,
  boxgroupgap = NULL,
  hovertext = NULL,
  show_n = FALSE,
  # boxmode = NULL,
  pvals = NULL,
  htest = "none",
  htest_compare = 0,
  #    htest.thresh = .05,
  htest_y = NULL,
  htest_annotate = TRUE,
  htest_annotate_x = 0,
  htest_annotate_y = -.065,
  htest_star_col = theme[["labs_col"]],
  htest_bracket_col = theme[["labs_col"]],
  starbracket_pad = c(.04, .05, .09),
  use_plotly_group = FALSE,
  width = NULL,
  height = NULL,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  mathjax = NULL
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Arguments ----
  type <- match.arg(type)
  x_transform <- match.arg(x_transform)

  # Convert vector or data.frame/data.table/matrix to list
  if (!is.list(x)) {
    # x is vector
    if (is.numeric(x)) {
      .names <- deparse(substitute(x))
      x <- list(x)
      names(x) <- .names
    } else {
      # x is data.frame or matrix
      .names <- colnames(x)
      x <- lapply(seq_len(NCOL(x)), function(i) x[, i])
      names(x) <- .names
    }
  }
  nvars <- length(x)
  if (nvars > 1 && !is.null(group) && !is.null(time)) {
    cli::cli_abort("Better use subplot for each variable")
  }
  horizontal <- orientation == "h"

  if (x_transform != "none") {
    if (x_transform == "scale") {
      x <- lapply(x, \(z) as.numeric(scale(z)))
    } else if (x_transform == "minmax") {
      x <- lapply(x, drange)
    } else {
      cli::cli_abort("Unsupported x_transform specified")
    }
  }

  # Order by fn ----
  if (!is.null(order_by_fn)) {
    if (is.null(time)) {
      if (is.list(x)) {
        .order <- order(sapply(x, order_by_fn, na.rm = TRUE))
        if (is.data.frame(x)) {
          x <- x[, .order]
        } else {
          x <- x[names(x)[.order]]
        }
      }
      if (!is.null(xnames)) {
        xnames <- xnames[.order]
      }
    } else {
      cli::cli_abort("Cannot use `order_by_fn` with `time`")
    }
  }

  # Remove non-numeric vectors
  # which.nonnum <- which(sapply(x, function(i) !is.numeric(i)))
  # if (length(which.nonnum) > 0) x[[which.nonnum]] <- NULL

  if (!is.null(group)) {
    group <- factor(group)
  }
  n_groups <- if (is.null(group)) {
    length(x)
  } else {
    nlevels(group)
  }
  if (n_groups == 1) {
    htest <- "none"
  }
  .xnames <- xnames
  if (is.null(.xnames)) {
    .xnames <- names(x)
    if (is.null(.xnames)) {
      .xnames <- paste0("Feature", seq(n_groups))
    }
    if (labelify) .xnames <- labelify(.xnames)
  }

  # Colors ----
  col <- recycle(palette, seq(n_groups))

  # Theme ----
  check_is_S7(theme, Theme)

  if (theme[["main_font"]] == 2) {
    main <- paste0("<b>", main, "</b>")
  }
  bg <- plotly::toRGB(theme[["bg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]])
  tick_col <- plotly::toRGB(theme[["tick_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])

  # Derived
  if (is.null(legend_col)) {
    legend_col <- labs_col
  }

  # Plot ----
  if (is.null(time)) {
    if (is.null(group)) {
      # A.1 Single and multiple boxplots ----
      if (is.null(legend)) {
        legend <- FALSE
      }
      # Args for first trace
      .args <- if (horizontal) {
        list(x = x[[1]], y = NULL)
      } else {
        list(x = NULL, y = x[[1]])
      }
      .args <- c(
        .args,
        list(
          type = type,
          # name = .xnames[1],
          name = if (show_n) {
            paste0(.xnames[1], " (N=", length(x[[1]]), ")")
          } else {
            .xnames[1]
          },
          line = list(color = plotly::toRGB(col[1])),
          fillcolor = plotly::toRGB(col[1], alpha),
          marker = list(color = plotly::toRGB(col[1], alpha)),
          showlegend = legend
          # width = width
        )
      )
      if (!is.null(hovertext) && n_groups == 1) {
        hovertext <- list(hovertext)
      }
      if (type == "box") {
        .args <- c(
          .args,
          list(
            quartilemethod = quartilemethod,
            boxpoints = boxpoints
          )
        )
        if (!is.null(hovertext)) .args[["text"]] <- hovertext[[1]]
      }
      if (type == "violin") {
        .args[["box"]] <- list(visible = violin_box)
      }
      plt <- do.call(plotly::plot_ly, .args)
      if (n_groups > 1) {
        for (i in seq_len(n_groups)[-1]) {
          plt <- plotly::add_trace(
            plt,
            x = if (horizontal) x[[i]] else NULL,
            y = if (horizontal) NULL else x[[i]],
            # name = .xnames[i],
            name = if (show_n) {
              paste0(.xnames[i], " (N=", length(x[[i]]), ")")
            } else {
              .xnames[i]
            },
            line = list(color = plotly::toRGB(col[i])), # box borders
            fillcolor = plotly::toRGB(col[i], alpha), # box fill
            marker = list(color = plotly::toRGB(col[i], alpha)), # points
            text = if (!is.null(hovertext)) hovertext[[i]] else NULL
          )
        }
      }

      # '-Annotate N ----
      if (annotate_n) {
        Nperbox <- Filter(
          function(i) i > 0,
          sapply(x, function(j) length(na.exclude(j)))
        )
        plt <- plt |>
          plotly::add_annotations(
            xref = "paper",
            yref = "paper",
            xanchor = "right",
            yanchor = "bottom",
            x = 0,
            y = annotate_n_y,
            text = "N =",
            font = list(
              family = theme[["font_family"]],
              size = font_size,
              color = annotate_col
            ),
            showarrow = FALSE
          ) |>
          plotly::add_annotations(
            xref = "x",
            yref = "paper",
            yanchor = "bottom",
            # x = seq_len(nvars) - 1,
            x = seq_along(Nperbox) - 1,
            y = 1,
            text = as.character(Nperbox),
            font = list(
              family = theme[["font_family"]],
              size = font_size,
              color = annotate_col
            ),
            showarrow = FALSE
          )
      } # /annotate_n

      # '-Annotate Mean SD ----
      if (annotate_meansd) {
        Meanperbox <- sapply(x, function(j) mean(na.exclude(j))) |>
          round(digits = 2) |>
          format(nsmall = 2)
        SDperbox <- sapply(x, function(j) sd(na.exclude(j))) |>
          round(digits = 2) |>
          format(nsmall = 2)
        plt <- plt |>
          plotly::add_annotations(
            xref = "x",
            yref = "paper",
            yanchor = "bottom",
            x = seq_along(Meanperbox) - 1,
            y = 1,
            # text = as.character(Nperbox),
            text = paste0(Meanperbox, " (", SDperbox, ")"),
            font = list(
              family = theme[["font_family"]],
              size = font_size,
              color = annotate_col
            ),
            showarrow = FALSE
          )
      } # /annotate_meansd

      # '-htest ----
      if (htest != "none") {
        if (htest_compare == 0) {
          pvals <- sapply(x[-1], \(v) {
            suppressWarnings(
              do.call(htest, list(x = x[[1]], y = v))[["p.value"]]
            )
          })
        }
        y_sb <- starbracket_y(unlist(x), pad = starbracket_pad)
        if (is.null(htest_y)) {
          htest_y <- y_sb[["star"]]
        }
        plt <- plt |>
          plotly::add_annotations(
            xref = if (horizontal) "paper" else "x",
            # yref = if (horizontal) "x" else "paper",
            yref = if (horizontal) "x" else "y",
            yanchor = if (horizontal) "auto" else "top",
            xanchor = if (horizontal) "center" else "auto",
            x = if (horizontal) htest_y else seq_along(pvals), # exclude first
            y = if (horizontal) seq_along(pvals) else htest_y,
            # text = unname(ifelse(pvals < htest.thresh, "*", "")),
            text = pval_stars(pvals),
            font = list(
              family = theme[["font_family"]],
              size = font_size,
              color = annotate_col
            ),
            showarrow = FALSE
          )

        if (htest_annotate) {
          test <- switch(
            htest,
            `wilcox.test` = "Wilcoxon",
            `t.test` = "T-test",
            htest
          )
          plt <- plt |>
            plotly::add_annotations(
              xref = "paper",
              yref = "paper",
              yanchor = "top",
              xanchor = "left",
              x = htest_annotate_x,
              y = htest_annotate_y,
              # text = paste0("<sup>*</sup>", test, " p-val < ", htest.thresh),
              # text = paste0("* ", test, " p-val < ", htest.thresh),
              # text = paste0(
              #     '<span style="color:',
              #     htest.star.col, '">* </span>',
              #     test, " p-val < ", htest.thresh),
              text = paste0(
                test,
                " p-val:",
                '<span style="color:',
                htest_star_col,
                '"> * </span>',
                "< .05",
                '<span style="color:',
                htest_star_col,
                '"> ** </span>',
                "< .01",
                '<span style="color:',
                htest_star_col,
                '"> *** </span>',
                "< .001"
              ),
              font = list(
                family = theme[["font_family"]],
                size = font_size,
                color = annotate_col
              ),
              showarrow = FALSE
            )
        }
      } # / htest!="none"
    } else {
      if (use_plotly_group) {
        # A.2.a Grouped boxplots with [group] ----
        # Best to use this for multiple variables x group.
        # For single variables x group, preferred way it to use
        # split(var, group) => A1
        if (is.null(legend)) {
          legend <- TRUE
        }
        dt <- cbind(data.table::as.data.table(x), group = group)
        dtlong <- data.table::melt(
          dt[, ID := seq_len(nrow(dt))],
          id.vars = c("ID", "group")
        )
        if (is.null(ylab)) {
          ylab <- ""
        }
        .args <- list(
          data = dtlong,
          type = type,
          x = if (horizontal) ~value else ~variable,
          y = if (horizontal) ~variable else ~value,
          color = ~group,
          colors = col2hex(col),
          showlegend = legend
        )
        if (type == "box") {
          .args <- c(
            .args,
            list(
              quartilemethod = quartilemethod,
              boxpoints = boxpoints,
              alpha = alpha
            )
          )
          if (!is.null(hovertext)) {
            dtlong <- merge(dtlong, cbind(dt[, list(ID)], hovertext))
            .args[["text"]] <- dtlong[["hovertext"]]
          }
        }
        if (type == "violin") {
          .args[["box"]] <- list(visible = violin_box)
        }
        cataxis <- list(
          tickvals = 0:(NCOL(dt) - 2),
          ticktext = .xnames
        )
        .args <- c(list(width = width, height = height), .args)
        plt <- do.call(plotly::plot_ly, .args) |>
          plotly::layout(
            boxmode = "group",
            xaxis = if (horizontal) NULL else cataxis,
            yaxis = if (horizontal) cataxis else NULL
          )
      } else {
        # A.2.b Grouped boxplots with split and loop ----
        # Replaces A.2.a to allow annotation positioning
        if (is.null(legend)) {
          legend <- TRUE
        }
        dts <- split(data.table::as.data.table(x), group, drop = TRUE)

        if (is.null(ylab)) {
          ylab <- ""
        }
        if (type == "box") {
          .args <- list(
            type = "box",
            quartilemethod = quartilemethod,
            boxpoints = boxpoints,
            alpha = alpha
          )
        } else {
          .args <- list(
            type = "violin",
            box = list(visible = violin_box)
          )
        }

        varnames <- names(x)
        nvars <- length(varnames)
        ngroups <- length(dts)
        groupnames <- names(dts)
        xval <- do.call(paste, expand.grid(groupnames, varnames))
        # text = xval[i],
        xval <- factor(xval, levels = xval)

        boxindex <- 0

        # plt <- plotly::plot_ly(type = type) # box or violin
        .args <- c(list(width = width, height = height), .args)
        plt <- do.call(plotly::plot_ly, .args)
        for (i in seq_along(varnames)) {
          # loop vars
          for (j in seq_along(dts)) {
            # loop groups
            boxindex <- boxindex + 1
            plt <- plt |>
              plotly::add_trace(
                x = if (horizontal) dts[[j]][[i]] else xval[boxindex],
                y = if (horizontal) xval[boxindex] else dts[[j]][[i]],
                name = groupnames[j],
                meta = xval[boxindex],
                line = list(color = plotly::toRGB(col[j])),
                fillcolor = plotly::toRGB(col[j], alpha),
                marker = list(color = plotly::toRGB(col[j], alpha)),
                showlegend = legend & (i == nvars),
                hoverinfo = "all",
                legendgroup = groupnames[j]
              )
          }
        }

        cataxis <- list(
          type = "category",
          tickmode = "array",
          tickvals = (mean(seq_len(ngroups)) + 0:(nvars - 1) * ngroups) - 1, # need -1 if type = "category"
          ticktext = .xnames,
          tickangle = cataxis_tickangle,
          automargin = TRUE
        )

        plt <- plt |>
          plotly::layout(
            xaxis = if (horizontal) NULL else cataxis,
            yaxis = if (horizontal) cataxis else NULL
          )

        # '- Group lines ----
        if (nvars > 1 && group_lines) {
          if (is.null(group_lines_col)) {
            group_lines_col <- theme[["fg"]]
          }
          group_lines_col <- adjustcolor(
            group_lines_col,
            group_lines_alpha
          )
          at <- seq((ngroups - .5), (ngroups * (nvars - 1) - .5), by = ngroups)
          if (horizontal) {
            plt <- plt |>
              plotly::layout(
                shapes = plotly_hline(
                  at,
                  color = group_lines_col,
                  dash = group_lines_dash
                )
              )
          } else {
            plt <- plt |>
              plotly::layout(
                shapes = plotly_vline(
                  at,
                  color = group_lines_col,
                  dash = group_lines_dash
                )
              )
          }
        }

        # '-Annotate N ----
        if (annotate_n) {
          Nperbox <- Filter(
            function(i) i > 0,
            c(t(sapply(dts, function(i) {
              sapply(i, function(j) length(na.exclude(j)))
            })))
          )
          plt <- plt |>
            plotly::add_annotations(
              xref = "paper",
              yref = "paper",
              xanchor = "right",
              yanchor = "bottom",
              x = 0,
              y = annotate_n_y,
              text = "N =",
              font = list(
                family = theme[["font_family"]],
                size = font_size,
                color = annotate_col
              ),
              showarrow = FALSE
            ) |>
            plotly::add_annotations(
              xref = "x",
              yref = "paper",
              yanchor = "bottom",
              x = seq_len(nvars * ngroups) - 1,
              y = 1,
              text = as.character(Nperbox),
              font = list(
                family = theme[["font_family"]],
                size = font_size,
                color = annotate_col
              ),
              showarrow = FALSE
            )
        } # /annotate_n

        # '-Annotate Mean SD ----
        if (annotate_meansd) {
          Meanperbox <- c(t(sapply(dts, function(i) {
            sapply(i, function(j) mean(na.exclude(j)))
          }))) |>
            round(digits = 2) |>
            format(nsmall = 2)
          SDperbox <- c(t(sapply(dts, function(i) {
            sapply(i, function(j) sd(na.exclude(j)))
          }))) |>
            round(digits = 2) |>
            format(nsmall = 2)
          plt <- plt |>
            plotly::add_annotations(
              xref = "x",
              yref = "paper",
              yanchor = "bottom",
              x = seq_len(nvars * ngroups) - 1,
              y = 1,
              text = paste0(Meanperbox, " (", SDperbox, ")"),
              font = list(
                family = theme[["font_family"]],
                size = font_size,
                color = annotate_col
              ),
              showarrow = FALSE
            )
        } # /annotate_meansd

        # '-Annotate Mean ----
        if (annotate_mean) {
          Meanperbox <- c(t(sapply(dts, function(i) {
            sapply(i, function(j) mean(na.exclude(j)))
          }))) |>
            round(digits = 1) |>
            format(nsmall = 1)
          plt <- plt |>
            plotly::add_annotations(
              xref = "x",
              yref = "paper",
              yanchor = "bottom",
              x = seq_len(nvars * ngroups) - 1,
              y = 1,
              text = Meanperbox,
              font = list(
                family = theme[["font_family"]],
                size = font_size,
                color = annotate_col
              ),
              showarrow = FALSE
            )
        } # /annotate_mean

        # '- htest ----
        if (htest != "none" || !is.null(pvals)) {
          # dts list elements are groups; columns are variables
          # pvals is N groups -1 x N vars
          if (is.null(pvals)) {
            if (htest_compare == 0) {
              pvals <- sapply(seq_len(nvars), \(cid) {
                sapply(2:ngroups, \(gid) {
                  suppressWarnings(
                    do.call(
                      htest,
                      list(
                        x = dts[[1]][[cid]],
                        y = dts[[gid]][[cid]]
                      )
                    )[["p.value"]]
                  )
                })
              })
              pvals <- c(rbind(1, pvals))
            } else if (htest_compare == 2) {
              pvals <- rep(1, nvars * ngroups)
              pvals[seq(2, ngroups * nvars, 2)] <- lapply(
                seq_len(nvars),
                \(cid) {
                  lapply(seq(htest_compare, ngroups, htest_compare), \(gid) {
                    suppressWarnings(
                      do.call(
                        htest,
                        list(
                          x = dts[[gid - 1]][[cid]],
                          y = dts[[gid]][[cid]]
                        )
                      )[["p.value"]]
                    )
                  })
                }
              ) |>
                unlist()
            }
          }
          # if brackets are drawn, center stars above them, otherwise
          # center stars above boxes
          axshift <- if (htest_compare == 2) 1.5 else 1
          y_sb <- starbracket_y(unlist(x), pad = starbracket_pad)
          if (is.null(htest_y)) {
            htest_y <- y_sb[["star"]]
          }
          plt <- plt |>
            plotly::add_annotations(
              xref = if (horizontal) "paper" else "x",
              # yref = if (horizontal) "x" else "paper",
              yref = if (horizontal) "x" else "y",
              yanchor = if (horizontal) "auto" else "top",
              xanchor = if (horizontal) "center" else "auto",
              x = if (horizontal) {
                htest_y
              } else {
                seq_len(nvars * ngroups) - axshift
              },
              y = if (horizontal) {
                seq_len(nvars * ngroups) - axshift
              } else {
                htest_y
              },
              # text = unname(ifelse(pvals < htest.thresh, "*", "")),
              text = pval_stars(pvals),
              font = list(
                family = theme[["font_family"]],
                size = font_size,
                color = htest_star_col
              ),
              showarrow = FALSE
            )
          if (htest_annotate) {
            test <- switch(
              htest,
              `wilcox.test` = "Wilcoxon",
              `t.test` = "T-test",
              htest
            )
            plt <- plt |>
              plotly::add_annotations(
                xref = "paper",
                yref = "paper",
                yanchor = "top",
                xanchor = "left",
                x = htest_annotate_x,
                y = htest_annotate_y,
                # text = paste0("<sup>*</sup>", test, " p-val < ", htest.thresh),
                # text = paste0("* ", test, " p-val < ", htest.thresh),
                # text = paste0(
                #     '<span style="color:',
                #     htest.star.col, '">* </span>',
                #     test, " p-val < ", htest.thresh
                # ),
                text = paste0(
                  test,
                  " p-val:",
                  '<span style="color:',
                  htest_star_col,
                  '"> * </span>',
                  "< .05",
                  '<span style="color:',
                  htest_star_col,
                  '"> ** </span>',
                  "< .01",
                  '<span style="color:',
                  htest_star_col,
                  '"> *** </span>',
                  "< .001"
                ),
                font = list(
                  family = theme[["font_family"]],
                  size = font_size,
                  color = annotate_col
                ),
                showarrow = FALSE
              )
          } # /htest.annotate

          # '- htest brackets for htest.compare == 2 ----
          if (htest_compare == 2) {
            for (i in seq(2, ngroups * nvars, 2)) {
              if (pvals[i] < .05) {
                # y_bracket <- bracket_y(unlist(x))
                plt <- plt |>
                  plotly::add_trace(
                    x = c(rep(xval[i - 1], 2), rep(xval[i], 2)),
                    y = y_sb[["bracket"]],
                    type = "scatter",
                    mode = "lines",
                    inherit = FALSE,
                    line = list(color = htest_bracket_col, width = 1),
                    showlegend = FALSE
                  )
              }
            }
          }
        } # /htest grouped
      }
    }
  } else {
    # B. Time-binned boxplots ----
    time_bin <- match.arg(time_bin)
    if (is.null(xlab)) {
      xlab <- ""
    }
    if (is.null(ylab)) {
      ylab <- ""
    }
    if (is.null(legend)) {
      legend <- TRUE
    }

    dt <- data.table::as.data.table(x)
    if (!is.null(group)) {
      dt[, group := group]
    }
    if (!is.null(hovertext)) {
      dt[, hovertext := hovertext]
    }

    dt[, timeperiod := date2factor(time, time_bin)] |>
      setkey(timeperiod)

    Npertimeperiod <- dt[levels(timeperiod)][,
      lapply(.SD, \(i) length(na.exclude(i))),
      by = timeperiod
    ] |>
      setorder()

    ## Long data
    # appease R CMD check
    ID <- timeperiod <- NULL
    dtlong <- data.table::melt(
      dt[, ID := .I],
      id.vars = c(
        "ID",
        "timeperiod",
        mgetnames(dt, "group", "hovertext")
      )
    )

    if (is.null(group)) {
      .args <- list(
        data = dtlong,
        type = type,
        x = if (horizontal) ~value else ~timeperiod,
        y = if (horizontal) ~timeperiod else ~value,
        color = ~variable,
        colors = col2hex(col),
        showlegend = legend
      )
    } else {
      .args <- list(
        data = dtlong,
        type = type,
        x = if (horizontal) ~value else ~timeperiod,
        y = if (horizontal) ~timeperiod else ~value,
        color = ~group,
        colors = col2hex(col),
        showlegend = legend
      )
    }

    if (!is.null(hovertext)) {
      .args[["text"]] <- dtlong[["hovertext"]]
    }

    if (type == "box") {
      .args <- c(
        .args,
        list(
          quartilemethod = quartilemethod,
          boxpoints = boxpoints
        )
      )
    }
    if (type == "violin") {
      .args[["box"]] <- list(visible = violin_box)
    }

    .args <- c(list(width = width, height = height), .args)
    plt <- do.call(plotly::plot_ly, .args)
    if (!is.null(group) || nvars > 1) {
      plt <- plt |> plotly::layout(boxmode = "group")
    }

    # '-Annotate N ----
    if (is.null(group) && annotate_n) {
      Nperbox <- Npertimeperiod[[2]] # include zeros
      plt <- plt |>
        plotly::add_annotations(
          xref = "paper",
          yref = "paper",
          xanchor = "right",
          yanchor = "bottom",
          x = 0,
          y = annotate_n_y,
          text = "N =",
          font = list(
            family = theme[["font_family"]],
            size = font_size,
            color = annotate_col
          ),
          showarrow = FALSE
        ) |>
        plotly::add_annotations(
          xref = "x",
          yref = "paper",
          yanchor = "bottom",
          x = seq_along(Nperbox) - 1,
          y = 1,
          text = paste(Nperbox),
          font = list(
            family = theme[["font_family"]],
            size = font_size,
            color = annotate_col
          ),
          showarrow = FALSE
        )
    }
  } # /time-binned boxplots

  # Layout ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )
  tickfont <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = theme[["tick_labels_col"]]
  )
  .legend <- list(
    x = legend_xy[1],
    y = legend_xy[2],
    xanchor = legend_xanchor,
    yanchor = legend_yanchor,
    bgcolor = "#ffffff00",
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = legend_col
    ),
    orientation = legend_orientation
  )

  yaxis_title <- if (horizontal) xlab else ylab
  plt <- plotly::layout(
    plt,
    yaxis = list(
      title = list(text = yaxis_title, standoff = ylab_standoff),
      type = if (horizontal) xaxis_type else NULL,
      titlefont = f,
      showgrid = if (horizontal) FALSE else theme[["grid"]],
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = if (horizontal) NA else tick_col,
      tickfont = tickfont,
      zeroline = FALSE,
      automargin = automargin_y,
      range = ylim
    ),
    xaxis = list(
      title = if (horizontal) ylab else xlab,
      type = if (horizontal) NULL else xaxis_type,
      titlefont = f,
      showgrid = if (horizontal) theme[["grid"]] else FALSE,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = if (horizontal) tick_col else NA,
      tickfont = tickfont,
      automargin = automargin_x,
      range = xlim
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]]
    ),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    margin = margin,
    legend = .legend,
    # boxgap = boxgap,
    boxgroupgap = boxgroupgap
  )

  # Config ----
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    ),
    mathjax = mathjax
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_box.R


================================================
FILE: R/draw_calibration.R
================================================
# draw_calibration.R
# ::rtemis::
# 2023 EDG rtemis.org

#' Draw calibration plot
#'
#' @param true_labels Factor or list of factors with true class labels
#' @param predicted_prob Numeric vector or list of numeric vectors with predicted probabilities
#' @param bin_method Character: "quantile" or "equidistant": Method to bin the estimated
#' probabilities.
#' @param n_bins Integer: Number of windows to split the data into
#' @param binclasspos Integer: Index of the positive class. The convention used in the package is
#' the second level is the positive class.
#' @param main Character: Main title
#' @param subtitle Character: Subtitle, placed bottom right of plot
#' @param xlab Character: x-axis label
#' @param ylab Character: y-axis label
#' @param show_marginal_x Logical: Add marginal plot of distribution of estimated probabilities
#' @param marginal_x_y Numeric: y position of marginal plot
#' @param marginal_col Character: Color of marginal plot
#' @param marginal_size Numeric: Size of marginal plot
#' @param mode Character: "lines", "markers", "lines+markers": How to plot.
#' @param show_brier Logical: If TRUE, add Brier scores to trace names.
#' @param theme `Theme` object.
#' @param filename Character: Path to save output.
#' @param ... Additional arguments passed to [draw_scatter]
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # Synthetic data with n cases
#' n <- 500L
#' true_labels <- factor(sample(c("A", "B"), n, replace = TRUE))
#' # Synthetic probabilities where A has mean 0.25 and B has mean 0.75
#' predicted_prob <- ifelse(true_labels == "A",
#'   rbeta(n, 2, 6),
#'   rbeta(n, 6, 2)
#' )
#' draw_calibration(true_labels, predicted_prob)
draw_calibration <- function(
  true_labels,
  predicted_prob,
  n_bins = 10L,
  bin_method = c("quantile", "equidistant"),
  binclasspos = 2L,
  main = NULL,
  subtitle = NULL,
  xlab = "Mean predicted probability",
  ylab = "Empirical risk",
  show_marginal_x = TRUE,
  marginal_x_y = -.02,
  marginal_col = NULL,
  marginal_size = 10,
  mode = "markers+lines",
  show_brier = TRUE,
  theme = choose_theme(getOption("rtemis_theme")),
  filename = NULL,
  ...
) {
  # Arguments ----
  bin_method <- match.arg(bin_method)
  if (!is.list(true_labels)) {
    true_labels <- list(true_labels = true_labels)
  }
  if (!is.list(predicted_prob)) {
    predicted_prob <- list(estimated_prob = predicted_prob)
  }
  # Ensure same number of inputs
  stopifnot(length(true_labels) == length(predicted_prob))

  # Theme ----
  check_is_S7(theme, Theme)

  pos_class <- lapply(true_labels, \(x) {
    levels(x)[binclasspos]
  })

  # Ensure same positive class
  stopifnot(length(unique(unlist(pos_class))) == 1)

  # Create windows
  if (bin_method == "equidistant") {
    breaks <- lapply(seq_along(predicted_prob), \(x) {
      seq(0, 1, length.out = n_bins + 1)
    })
  } else if (bin_method == "quantile") {
    breaks <- lapply(predicted_prob, \(x) {
      quantile(x, probs = seq(0, 1, length.out = n_bins + 1))
    })
  }

  # Calculate the mean probability in each window
  mean_bin_prob <- lapply(seq_along(predicted_prob), \(i) {
    sapply(seq_len(n_bins), \(j) {
      mean(predicted_prob[[i]][
        predicted_prob[[i]] >= breaks[[i]][j] &
          predicted_prob[[i]] < breaks[[i]][j + 1]
      ])
    })
  })
  names(mean_bin_prob) <- names(predicted_prob)

  # Calculate the proportion of condition positive cases in each window
  window_empirical_risk <- lapply(seq_along(predicted_prob), \(i) {
    sapply(seq_len(n_bins), \(j) {
      idl <- predicted_prob[[i]] >= breaks[[i]][j] &
        predicted_prob[[i]] < breaks[[i]][j + 1]
      sum(true_labels[[i]][idl] == pos_class[[i]]) / sum(idl)
    })
  })
  names(window_empirical_risk) <- names(predicted_prob)

  # Add Brier score
  if (show_brier) {
    .brier_score <- sapply(seq_along(predicted_prob), \(i) {
      brier_score(
        true_int = labels2int(true_labels[[i]], binclasspos),
        predicted_prob = predicted_prob[[i]]
      )
    })
    names(window_empirical_risk) <- paste0(
      names(window_empirical_risk),
      " (Brier=",
      round(.brier_score, 3),
      ")"
    )
  }

  # Calculate confidence intervals
  # confint <- sapply(seq_len(n_bins), \(i) {
  #     events <- length(true_labels[true_labels == pos_class & predicted_prob >= breaks[i] & predicted_prob < breaks[i + 1]])
  #     total <- length(predicted_prob >= breaks[i] & predicted_prob < breaks[i + 1])
  #     suppressWarnings(pt <- prop.test(
  #         events, total,
  #         conf.level = conf_level
  #     ))
  #     pt$conf.int
  # })

  # Plot
  if (is.null(subtitle)) {
    subtitle <- paste(
      "using",
      n_bins,
      if (bin_method == "quantile") "quantiles" else "equidistant bins"
    )
  }
  # if (is.null(subtitle) && !is.na(subtitle)) .subtitle <- paste0(subtitle, "\n", .subtitle)
  plt <- draw_scatter(
    x = mean_bin_prob,
    y = window_empirical_risk,
    main = main,
    # subtitle = paste("<i>", .subtitle, "</i>"),
    subtitle = subtitle,
    subtitle_x = 1,
    subtitle_y = 0,
    subtitle_yref = "y",
    subtitle_xanchor = "right",
    subtitle_yanchor = "bottom",
    xlab = xlab,
    ylab = ylab,
    show_marginal_x = show_marginal_x,
    marginal_x = predicted_prob,
    marginal_x_y = marginal_x_y,
    marginal_size = marginal_size,
    axes_square = TRUE,
    diagonal = TRUE,
    xlim = c(0, 1),
    ylim = c(0, 1),
    mode = mode,
    theme = theme,
    filename = filename,
    ...
  )

  # Add marginal.x ----
  # Using estimated probabilities
  # if (marginal.x) {
  #   if (is.null(marginal.col)) marginal.col <- plotly::toRGB(theme[["fg"]], alpha = .5)
  #   for (i in seq_along(mean_bin_prob)) {
  #     plt <- plotly::add_trace(
  #       plt,
  #       x = predicted_prob[[i]],
  #       y = rep(-.02, length(predicted_prob[[i]])),
  #       type = "scatter",
  #       mode = "markers",
  #       marker = list(
  #         color = marginal.col,
  #         size = marginal.size,
  #         symbol = "line-ns-open"
  #       ),
  #       showlegend = FALSE,
  #       hoverinfo = "x"
  #     )
  #   }
  # } # /marginal.x

  plt
} # /rtemis::draw_calibration


================================================
FILE: R/draw_confusion.R
================================================
# draw_confusion.R
# ::rtemis::
# 2024- EDG rtemis.org

#' Plot confusion matrix
#'
#' @param x `ClassificationMetrics` object produced by [classification_metrics] or confusion matrix
#' where rows are the reference and columns are the estimated classes. For binary classification,
#' the first row and column are the positive class.
#' @param xlab Character: x-axis label. Default is "Predicted".
#' @param ylab Character: y-axis label. Default is "Reference".
#' @param true_col Color for true positives & true negatives.
#' @param false_col Color for false positives & false negatives.
#' @param font_size Integer: font size.
#' @param main Character: plot title.
#' @param main_y Numeric: y position of the title.
#' @param main_yanchor Character: y anchor of the title.
#' @param theme `Theme` object.
#' @param margin List: Plot margins.
#' @param filename Character: file name to save the plot. Default is NULL.
#' @param file_width Numeric: width of the file. Default is 500.
#' @param file_height Numeric: height of the file. Default is 500.
#' @param file_scale Numeric: scale of the file. Default is 1.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # Assume positive class is "b"
#' true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b"))
#' predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a"))
#' predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39)
#' metrics <- classification_metrics(true_labels, predicted_labels, predicted_prob)
#' draw_confusion(metrics)
draw_confusion <- function(
  x,
  xlab = "Predicted",
  ylab = "Reference",
  true_col = "#43A4AC",
  false_col = "#FA9860",
  font_size = 18,
  main = NULL,
  main_y = 1,
  main_yanchor = "bottom",
  theme = choose_theme(getOption("rtemis_theme")),
  margin = list(l = 20, r = 5, b = 5, t = 20),
  # write to file
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
) {
  # Input ----
  if (S7_inherits(x, ClassificationMetrics)) {
    x <- x@metrics[["Confusion_Matrix"]]
  }

  if (is.null(dim(x)) || ncol(x) != nrow(x)) {
    cli::cli_abort("The confusion matrix must be a square matrix.")
  }

  # Metrics ----
  nclasses <- ncol(x)
  total <- sum(x)
  class_totals <- rowSums(x)
  condition_negative <- total - class_totals
  predicted_totals <- colSums(x)
  hits <- diag(x)
  # misses = class_totals - hits
  class_sensitivity <- hits / class_totals
  true_negative <- total - predicted_totals - (class_totals - hits)
  class_specificity <- true_negative / condition_negative
  class_balancedAccuracy <- .5 * (class_sensitivity + class_specificity)
  # PPV = true positive / predicted condition positive
  class_ppv <- hits / predicted_totals
  # NPV  = true negative / predicted condition negative
  class_npv <- true_negative / (total - predicted_totals)

  # Theme ----
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  main_col <- plotly::toRGB(theme[["main_col"]])

  # Colors ----
  pos_color <- colorRamp(colors = c(theme[["bg"]], true_col))
  neg_color <- colorRamp(colors = c(theme[["bg"]], false_col))

  # Fonts ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = theme[["labs_col"]]
  )

  # Plot ----
  plt <- plotly::plot_ly(
    type = "scatter",
    mode = "lines"
  )

  # Add colored tiles & counts ----
  for (i in seq_len(nclasses)) {
    for (j in seq_len(nclasses)) {
      plt <- make_plotly_conf_tile(
        p = plt,
        x = x,
        i = i,
        j = j,
        pos_color = pos_color,
        neg_color = neg_color,
        font_size = font_size,
        theme = theme
      )
    }
  }

  # Layout ----
  plt <- plotly::layout(
    plt,
    xaxis = list(
      side = "above",
      showticklabels = FALSE,
      showgrid = FALSE,
      zeroline = FALSE
    ),
    yaxis = list(
      showticklabels = FALSE,
      showgrid = FALSE,
      zeroline = FALSE,
      autorange = "reversed",
      scaleanchor = "x",
      scaleratio = 1
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]],
      yref = "paper",
      y = main_y,
      yanchor = main_yanchor
    ),
    showlegend = FALSE,
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    margin = margin
  ) # /layout

  # Class labels ----
  # Add class labels above and to the left of the plot
  # Left
  plt <- plotly::add_annotations(
    plt,
    x = rep(-0.125, nclasses),
    y = seq_len(nclasses) - 0.5,
    text = colnames(x),
    # textposition = "middle right",
    font = f,
    showarrow = FALSE,
    textangle = -90
  )
  # Above
  plt <- plotly::add_annotations(
    plt,
    x = seq_len(nclasses) - 0.5,
    y = rep(-0.125, nclasses),
    text = colnames(x),
    # textposition = "bottom center",
    font = f,
    showarrow = FALSE
  )

  # x-axis label "Predicted"
  plt <- plotly::add_annotations(
    plt,
    x = nclasses / 2,
    y = ifelse(nclasses == 2, -.3, -0.5),
    text = xlab,
    font = f,
    showarrow = FALSE
  )

  # y-axis label "Reference"
  plt <- plotly::add_annotations(
    plt,
    x = ifelse(nclasses == 2, -.3, -0.5),
    y = nclasses / 2,
    text = ylab,
    font = f,
    showarrow = FALSE,
    textangle = -90
  )

  # Metrics ----
  if (nclasses == 2) {
    # Sens./Spec. ----
    # Rect: Sens./Spec. bg
    plt <- plotly::add_trace(
      plt,
      x = c(nclasses, nclasses + 0.3, nclasses + 0.3, nclasses),
      y = c(0, 0, nclasses, nclasses),
      line = list(color = "transparent"),
      fill = "toself",
      fillcolor = plotly::toRGB(theme[["fg"]], alpha = .075),
      showlegend = FALSE
    )

    # Text: Sens. & Spec.
    plt <- plotly::add_annotations(
      plt,
      x = rep(nclasses + 0.15, 2),
      y = c(.5, 1.5),
      text = paste0(
        c("Sensitivity\n", "Specificity\n"),
        c(ddSci(class_sensitivity[1], 3), ddSci(class_specificity[1], 3))
      ),
      font = f,
      showarrow = FALSE,
      textangle = -90
    )

    # PPV/NPV ----
    # Rect: PPV/NPV bg
    plt <- plotly::add_trace(
      plt,
      x = c(0, nclasses, nclasses, 0, 0),
      y = c(nclasses, nclasses, nclasses + .3, nclasses + .3, nclasses),
      line = list(color = "transparent"),
      fill = "toself",
      fillcolor = plotly::toRGB(theme[["fg"]], alpha = .075),
      showlegend = FALSE
    )

    # Text: PPV & NPV
    plt <- plotly::add_annotations(
      plt,
      x = c(.5, 1.5),
      y = rep(nclasses + 0.15, 2),
      text = paste0(
        c("PPV\n", "NPV\n"),
        c(ddSci(class_ppv[1], 3), ddSci(class_npv[1], 3))
      ),
      font = f,
      showarrow = FALSE
    )
  } else {
    # PPV ----
    # Text: "PPV" at bottom left corner
    plt <- plotly::add_annotations(
      plt,
      x = -0.05,
      y = nclasses + .1,
      xanchor = "right",
      yanchor = "middle",
      text = "PPV",
      font = f,
      showarrow = FALSE
    )

    # Rect: PPV bg
    plt <- plotly::add_trace(
      plt,
      x = c(0, nclasses, nclasses, 0, 0),
      y = c(nclasses, nclasses, nclasses + 0.2, nclasses + 0.2, nclasses),
      line = list(color = "transparent"),
      fill = "toself",
      fillcolor = plotly::toRGB(theme[["fg"]], alpha = .075),
      showlegend = FALSE
    )

    # Text: Per-class PPV
    for (i in seq_len(nclasses)) {
      plt <- plotly::add_annotations(
        plt,
        x = i - 0.5,
        y = nclasses + .1,
        text = ddSci(class_ppv[i], 3),
        font = f,
        showarrow = FALSE
      )
    }

    # NPV ----
    # Label: "NPV" at bottom left corner
    plt <- plotly::add_annotations(
      plt,
      x = -0.05,
      y = nclasses + .3,
      xanchor = "right",
      yanchor = "middle",
      text = "NPV",
      font = f,
      showarrow = FALSE
    )

    # Rect: NPV bg
    plt <- plotly::add_trace(
      plt,
      x = c(0, nclasses, nclasses, 0, 0),
      y = c(
        nclasses + 0.2,
        nclasses + 0.2,
        nclasses + 0.4,
        nclasses + 0.4,
        nclasses + 0.2
      ),
      line = list(color = "transparent"),
      fill = "toself",
      fillcolor = plotly::toRGB(theme[["fg"]], alpha = .05),
      showlegend = FALSE
    )

    # Text: Per-class NPV
    for (i in seq_len(nclasses)) {
      plt <- plotly::add_annotations(
        plt,
        x = i - 0.5,
        y = nclasses + .3,
        text = ddSci(class_npv[i], 3),
        font = f,
        showarrow = FALSE
      )
    }

    # Sensitivity ----
    # Label: "Sens." top right vertically
    plt <- plotly::add_annotations(
      plt,
      x = nclasses + 0.1,
      y = -.05,
      yanchor = "bottom",
      text = "Sens.",
      font = f,
      showarrow = FALSE,
      textangle = -90
    )

    # Rect: Sens. bg
    plt <- plotly::add_trace(
      plt,
      x = c(nclasses, nclasses + 0.2, nclasses + 0.2, nclasses),
      y = c(0, 0, nclasses, nclasses),
      line = list(color = "transparent"),
      fill = "toself",
      fillcolor = plotly::toRGB(theme[["fg"]], alpha = .075),
      showlegend = FALSE
    )

    # Text: Per-class Sens.
    for (i in seq_len(nclasses)) {
      plt <- plotly::add_annotations(
        plt,
        x = nclasses + 0.1,
        y = i - 0.5,
        text = ddSci(class_sensitivity[i], 3),
        font = f,
        showarrow = FALSE,
        textangle = -90
      )
    }

    # Specificity ----
    # Label: "Spec." top right vertically
    plt <- plotly::add_annotations(
      plt,
      x = nclasses + 0.3,
      y = -.05,
      yanchor = "bottom",
      text = "Spec.",
      font = f,
      showarrow = FALSE,
      textangle = -90
    )

    # Rect: Spec. bg
    plt <- plotly::add_trace(
      plt,
      x = c(nclasses + 0.2, nclasses + 0.4, nclasses + 0.4, nclasses + 0.2),
      y = c(0, 0, nclasses, nclasses),
      line = list(color = "transparent"),
      fill = "toself",
      fillcolor = plotly::toRGB(theme[["fg"]], alpha = .05),
      showlegend = FALSE
    )

    # Text: Per-class Spec.
    for (i in seq_len(nclasses)) {
      plt <- plotly::add_annotations(
        plt,
        x = nclasses + 0.3,
        y = i - 0.5,
        text = ddSci(class_specificity[i], 3),
        font = f,
        showarrow = FALSE,
        textangle = -90
      )
    }
  }

  # Balanced Accuracy ----
  # Rect: BA bg
  ba_pad <- ifelse(nclasses == 2, 0.3, 0.4)
  plt <- plotly::add_trace(
    plt,
    x = c(nclasses, nclasses + ba_pad, nclasses + ba_pad, nclasses),
    y = c(nclasses, nclasses, nclasses + ba_pad, nclasses + ba_pad),
    line = list(color = "transparent"),
    fill = "toself",
    fillcolor = plotly::toRGB(theme[["fg"]], alpha = .025),
    showlegend = FALSE
  )

  # Text: Balanced accuracy
  ba_pad <- ifelse(nclasses == 2, 0.15, 0.2)
  ba <- ifelse(
    nclasses == 2,
    class_balancedAccuracy[1],
    mean(class_balancedAccuracy)
  )
  plt <- plotly::add_annotations(
    plt,
    x = nclasses + ba_pad,
    y = nclasses + ba_pad,
    xanchor = "center",
    yanchor = "middle",
    text = paste0("BA\n", ddSci(ba, 3)),
    font = f,
    showarrow = FALSE
  )

  # Disable hoverinfo
  plt <- plotly::style(plt, hoverinfo = "none")

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  return(plt)
} # /rtemis::draw_confusion


#' Make plotly confusion matrix tile
#'
#' @author EDG
#' @keywords internal
#' @noRd
make_plotly_conf_tile <- function(
  p,
  x,
  i,
  j,
  pos_color,
  neg_color,
  font_size,
  theme,
  xref = "x",
  yref = "y"
) {
  val <- x[i, j] / sum(x[i, ])
  col <- if (i == j) {
    pos_color(val)
  } else {
    neg_color(val)
  }
  col <- rgb(col[1], col[2], col[3], maxColorValue = 255)
  # Add colored tile
  p <- plotly::add_trace(
    p,
    x = c(j - 1, j - 1, j, j, j - 1),
    y = c(i, i - 1, i - 1, i, i),
    line = list(color = "transparent"),
    fill = "toself",
    fillcolor = col
  )
  # Add text
  p <- plotly::add_trace(
    p,
    x = j - 0.5,
    y = i - 0.5,
    mode = "text",
    text = paste0("<b>", x[i, j], "</b>"),
    textposition = "middle center",
    textfont = list(
      family = theme[["font_family"]],
      color = ifelse(val > 0.5, theme[["bg"]], theme[["fg"]]),
      size = font_size
    ),
    showlegend = FALSE
  )

  return(p)
} # /rtemis::make_plotly_conf_tile


================================================
FILE: R/draw_dist.R
================================================
# draw_dist.R
# ::rtemis::
# 2019- EDG rtemis.org

# check whether list is reordered with ridge

#' Draw Distributions using Histograms and Density Plots
#'
#' Draw Distributions using Histograms and Density Plots using `plotly`.
#'
#' @details
#' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation.
#'
#' If input is data.frame, non-numeric variables will be removed.
#'
#' @param x Numeric vector / data.frame / list: Input. If not a vector, each column / each element is drawn.
#' @param type Character: "density" or "histogram".
#' @param mode Character: "overlap", "ridge". How to plot different groups; on the same axes ("overlap"), or on separate plots with the same x-axis ("ridge").
#' @param group Vector: Will be converted to factor; levels define group members.
#' @param main Character: Main title for the plot.
#' @param xlab Character: Label for the x-axis.
#' @param ylab Character: Label for the y-axis.
#' @param col Color: Colors for the plot.
#' @param alpha Numeric: Alpha transparency for plot elements.
#' @param plot_bg Color: Background color for plot area.
#' @param theme `Theme` object.
#' @param palette Character: Color palette to use.
#' @param axes_square Logical: If TRUE, draw a square plot to fill the graphic device. Default = FALSE.
#' @param group_names Character: Names for the groups.
#' @param font_size Numeric: Font size for plot text.
#' @param font_alpha Numeric: Alpha transparency for font.
#' @param legend Logical: If TRUE, draw legend. Default = NULL, which will be set to TRUE if x is a list of more than 1 element.
#' @param legend_xy Numeric, vector, length 2: Relative x, y position for legend. Default = c(0, 1).
#' @param legend_col Color: Color for the legend text.
#' @param legend_bg Color: Background color for legend.
#' @param legend_border_col Color: Border color for legend.
#' @param bargap Numeric: The gap between adjacent histogram bars in plot fraction.
#' @param vline Numeric, vector: If defined, draw a vertical line at this x value(s).
#' @param vline_col Color: Color for `vline`.
#' @param vline_width Numeric: Width for `vline`.
#' @param vline_dash Character: Type of line to draw: "solid", "dot", "dash", "longdash", "dashdot", or "longdashdot".
#' @param text Character: If defined, add this text over the plot.
#' @param text_x Numeric: x-coordinate for `text`.
#' @param text_xref Character: "x": `text_x` refers to plot's x-axis; "paper": `text_x` refers to plotting area from 0-1.
#' @param text_xanchor Character: "auto", "left", "center", "right".
#' @param text_y Numeric: y-coordinate for `text`.
#' @param text_yref Character: "y": `text_y` refers to plot's y-axis; "paper": `text_y` refers to plotting area from 0-1.
#' @param text_yanchor Character: "auto", "top", "middle", "bottom".
#' @param text_col Color: Color for `text`.
#' @param margin List: Margins for the plot.
#' @param automargin_x Logical: If TRUE, automatically adjust x-axis margins.
#' @param automargin_y Logical: If TRUE, automatically adjust y-axis margins.
#' @param zerolines Logical: If TRUE, draw lines at y = 0.
#' @param density_kernel Character: Kernel to use for density estimation.
#' @param density_bw Character: Bandwidth to use for density estimation.
#' @param histnorm Character: NULL, "percent", "probability", "density", "probability density".
#' @param histfunc Character: "count", "sum", "avg", "min", "max".
#' @param hist_n_bins Integer: Number of bins to use if type = "histogram".
#' @param barmode Character: Barmode for histogram. One of "overlay", "stack", "relative", "group".
#' @param ridge_sharex Logical: If TRUE, draw single x-axis when `mode = "ridge"`.
#' @param ridge_y_labs Logical: If TRUE, show individual y labels when `mode = "ridge"`.
#' @param ridge_order_on_mean Logical: If TRUE, order groups by mean value when `mode = "ridge"`.
#' @param displayModeBar Logical: If TRUE, display the mode bar.
#' @param modeBar_file_format Character: File format for mode bar. Default = "svg".
#' @param width Numeric: Force plot size to this width. Default = NULL, i.e. fill available space.
#' @param height Numeric: Force plot size to this height. Default = NULL, i.e. fill available space.
#' @param filename Character: Path to file to save static plot.
#' @param file_width Integer: File width in pixels for when `filename` is set.
#' @param file_height Integer: File height in pixels for when `filename` is set.
#' @param file_scale Numeric: If saving to file, scale plot by this number.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # Will automatically use only numeric columns
#' draw_dist(iris)
#' draw_dist(iris[["Sepal.Length"]], group = iris[["Species"]])
draw_dist <- function(
  x,
  type = c("density", "histogram"),
  mode = c("overlap", "ridge"),
  group = NULL,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  col = NULL,
  alpha = .75,
  plot_bg = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = getOption("rtemis_palette", "rtms"),
  axes_square = FALSE,
  group_names = NULL,
  font_size = 16,
  font_alpha = .8,
  legend = NULL,
  legend_xy = c(0, 1),
  legend_col = NULL,
  legend_bg = "#FFFFFF00",
  legend_border_col = "#FFFFFF00",
  bargap = .05,
  vline = NULL,
  vline_col = theme[["fg"]],
  vline_width = 1,
  vline_dash = "dot",
  text = NULL,
  text_x = 1,
  text_xref = "paper",
  text_xanchor = "left",
  text_y = 1,
  text_yref = "paper",
  text_yanchor = "top",
  text_col = theme[["fg"]],
  margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0),
  automargin_x = TRUE,
  automargin_y = TRUE,
  zerolines = FALSE,
  density_kernel = "gaussian",
  density_bw = "SJ",
  histnorm = c(
    "",
    "density",
    "percent",
    "probability",
    "probability density"
  ),
  histfunc = c("count", "sum", "avg", "min", "max"),
  hist_n_bins = 20,
  barmode = "overlay", # ?alternatives
  ridge_sharex = TRUE,
  ridge_y_labs = FALSE,
  ridge_order_on_mean = TRUE,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  width = NULL,
  height = NULL,
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Arguments ----
  type <- match.arg(type)
  mode <- match.arg(mode)
  if (!is.null(main)) {
    main <- paste0("<b>", main, "</b>")
  }
  .xname <- labelify(deparse(substitute(x)))

  # Data ----

  # '- Group ----
  if (!is.null(group)) {
    if (is.factor(group)) {
      group <- droplevels(group)
    } else {
      group <- as.factor(group)
    }
    x <- as.data.frame(x)
    # Can't have multiple vectors in `x` and `group`
    if (length(x) > 1 && !is.null(group)) {
      cli::cli_abort(
        "Can't have both multiple vectors in `x` and `group` defined."
      )
    }
    x <- split(x, group)
    x <- sapply(x, as.vector)
    if (is.null(group_names)) {
      group_names <- levels(group)
    }
    names(x) <- .names <- group_names
  }

  if (!is.list(x)) {
    x <- list(x)
  }
  n_groups <- length(x)
  if (n_groups == 1 && is.null(xlab)) {
    xlab <- .xname
  }

  # Remove non-numeric vectors
  which_nonnum <- which(sapply(x, function(i) !is.numeric(i)))
  if (length(which_nonnum) > 0) {
    for (i in rev(which_nonnum)) {
      x[[i]] <- NULL
    }
  }

  if (is.null(legend)) {
    legend <- length(x) > 1
  }
  if (!is.null(group_names)) {
    .names <- group_names
  } else {
    .names <- labelify(names(x))
  }
  if (is.null(.names)) {
    .names <- paste("Feature", seq_along(x))
  }

  # Colors ----
  if (is.character(palette)) {
    palette <- get_palette(palette)
  }
  n_groups <- length(x)
  if (is.null(col)) {
    col <- recycle(palette, seq(n_groups))[seq(n_groups)]
  }

  if (length(col) < n_groups) {
    col <- rep(col, n_groups / length(col))
  }

  # Theme ----
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]])
  tick_col <- plotly::toRGB(theme[["tick_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])
  if (!theme[["axes_visible"]]) {
    tick_col <- labs_col <- "transparent"
  }

  # '- Axis font ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )

  # '- Tick font ----
  tickfont <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = theme[["tick_labels_col"]]
  )

  # Derived
  if (is.null(legend_col)) {
    legend_col <- labs_col
  }

  # Size ----
  if (axes_square) {
    width <- height <- min(dev.size("px")) - 10
  }

  # Ridge ----
  if (mode == "ridge") {
    axis <- list(
      showline = FALSE,
      # mirror = axes_mirrored,
      showgrid = theme[["grid"]],
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tickfont,
      zeroline = zerolines
    )
    ridge_groups <- if (ridge_order_on_mean) {
      order(sapply(x, mean), decreasing = TRUE)
    } else {
      seq_len(n_groups)
    }
  }

  # plotly ----
  # z <- if (mode == "overlap") rep(1, n_groups) else seq_len(n_groups)
  # plt <- vector("list", n_groups)

  .text <- lapply(x, function(i) {
    paste(
      "mean =",
      ddSci(mean(i, na.rm = TRUE)),
      "\nsd =",
      ddSci(sd(i, na.rm = TRUE))
    )
  })

  # '- { Density } ----
  if (type == "density") {
    if (is.null(ylab)) {
      ylab <- "Density"
    }
    xl_density <- lapply(
      x,
      density,
      na.rm = TRUE,
      kernel = density_kernel,
      bw = density_bw
    )

    if (mode == "overlap") {
      # '- Density overlap ----
      plt <- plotly::plot_ly(
        width = width,
        height = height
      )
      for (i in seq_len(n_groups)) {
        plt <- plotly::add_trace(
          plt,
          x = xl_density[[i]][["x"]],
          y = xl_density[[i]][["y"]],
          type = "scatter",
          mode = "none",
          fill = "tozeroy",
          fillcolor = plotly::toRGB(col[[i]], alpha),
          name = .names[i],
          hovertext = .text[[i]],
          hoverinfo = "text",
          showlegend = legend
        )
      }
    } else {
      # '- Density ridge ----
      plt <- lapply(ridge_groups, function(i) {
        plotly::plot_ly(
          x = xl_density[[i]][["x"]],
          y = xl_density[[i]][["y"]],
          type = "scatter",
          mode = "none",
          fill = "tozeroy",
          fillcolor = plotly::toRGB(col[[i]], alpha),
          name = .names[i],
          hovertext = .text[[i]],
          hoverinfo = "text",
          showlegend = legend,
          width = width,
          height = height
        ) |>
          plotly::layout(
            xaxis = axis,
            yaxis = c(
              list(
                title = list(
                  text = .names[i],
                  font = f
                )
              ),
              axis
            )
          )
      })
    }
  } # End mode == "density"

  # '- { Histogram } ----
  if (type == "histogram") {
    # https://plotly.com/r/reference/#histogram-bingroup
    bingroup <- 1
    histnorm <- match.arg(histnorm)
    histfunc <- match.arg(histfunc)
    # if (is.null(ylab)) ylab <- "Count"

    if (mode == "overlap") {
      # '-  Histogram overlap ----
      plt <- plotly::plot_ly(
        width = width,
        height = height
      )
      for (i in seq_len(n_groups)) {
        plt <- plotly::add_trace(
          plt,
          x = x[[i]],
          type = "histogram",
          marker = list(color = plotly::toRGB(col[i], alpha)),
          name = .names[i],
          hovertext = .text[[i]],
          hoverinfo = "text",
          histnorm = histnorm,
          histfunc = histfunc,
          nbinsx = hist_n_bins,
          showlegend = legend,
          bingroup = bingroup
        )
      }
      plt <- plotly::layout(plt, barmode = barmode, bargap = bargap)
    } else {
      # '- Histogram ridge ----
      plt <- lapply(ridge_groups, function(i) {
        plotly::plot_ly(
          x = x[[i]],
          type = "histogram",
          histnorm = histnorm,
          histfunc = histfunc,
          nbinsx = hist_n_bins,
          marker = list(color = plotly::toRGB(col[i], alpha)),
          name = .names[i],
          hovertext = .text[[i]],
          hoverinfo = "text",
          showlegend = legend,
          width = width,
          height = height,
          bingroup = bingroup
        ) |>
          plotly::layout(
            xaxis = axis,
            yaxis = c(
              list(
                title = list(
                  text = .names[i],
                  font = f
                )
              ),
              axis
            ),
            bargap = bargap
          )
      })
    }
  }

  if (mode == "ridge") {
    plt <- plotly::subplot(
      plt,
      nrows = n_groups,
      shareX = ridge_sharex,
      # shareY = ridge_sharey,
      titleY = ridge_y_labs
    )
  }

  # Layout ----
  zerocol <- adjustcolor(theme[["zerolines_col"]], theme[["zerolines_alpha"]])
  # '- layout ----
  .legend <- list(
    x = legend_xy[1],
    y = legend_xy[2],
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = legend_col
    ),
    bgcolor = legend_bg,
    bordercolor = legend_border_col
  )

  plt <- plotly::layout(
    plt,
    xaxis = list(
      title = list(
        text = xlab,
        font = f
      ),
      showline = FALSE,
      # mirror = axes_mirrored,
      showgrid = theme[["grid"]],
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tickfont,
      zeroline = FALSE,
      automargin = automargin_x
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]]
    ),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    margin = margin,
    showlegend = legend,
    legend = .legend
  )

  if (mode == "overlap") {
    plt <- plotly::layout(
      plt,
      yaxis = list(
        title = list(
          text = ylab,
          font = f
        ),
        showline = FALSE,
        # mirror = axes_mirrored,
        showgrid = theme[["grid"]],
        gridcolor = grid_col,
        gridwidth = theme[["grid_lwd"]],
        tickcolor = tick_col,
        tickfont = tickfont,
        zeroline = zerolines,
        zerolinecolor = zerocol,
        zerolinewidth = theme[["zerolines_lwd"]],
        automargin = automargin_y
      )
    )
  }

  # vline ----
  if (!is.null(vline)) {
    plt <- plotly::layout(
      plt,
      shapes = plotly_vline(
        vline,
        color = vline_col,
        width = vline_width,
        dash = vline_dash
      )
    )
  }

  # text ----
  if (!is.null(text)) {
    plt <- plotly::layout(
      plt,
      annotations = list(
        text = text,
        x = text_x,
        xref = text_xref,
        xanchor = text_xanchor,
        y = text_y,
        yref = text_yref,
        yanchor = text_yanchor,
        font = list(
          color = text_col,
          family = theme[["font_family"]],
          size = font_size
        ),
        showarrow = FALSE
      )
    )
  }

  # Config
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    )
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_dist


================================================
FILE: R/draw_graphd3.R
================================================
# draw_graphD3
# ::rtemis::
# EDG rtemis.org

#' Plot graph using \pkg{networkD3}
#'
#' @param net \pkg{igraph} network.
#' @param groups Vector, length n nodes indicating group/cluster/community membership of nodes in `net`.
#' @param color_scale D3 colorscale (e.g. `networkD3::JS("d3.scaleOrdinal(d3.schemeCategory20b);")`).
#' @param edge_col Color for edges.
#' @param node_col Color for nodes.
#' @param node_alpha Float \[0, 1\]: Node opacity.
#' @param edge_alpha Float \[0, 1\]: Edge opacity.
#' @param zoom Logical: If TRUE, graph is zoomable.
#' @param legend Logical: If TRUE, display legend for groups.
#' @param palette Character vector: Colors to use.
#' @param theme `Theme` object.
#' @param ... Additional arguments to pass to `networkD3`.
#'
#' @return `forceNetwork` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' library(igraph)
#' g <- make_ring(10)
#' draw_graphD3(g)
draw_graphD3 <- function(
  net,
  groups = NULL,
  color_scale = NULL,
  edge_col = NULL,
  node_col = NULL,
  node_alpha = .5,
  edge_alpha = .33,
  zoom = TRUE,
  legend = FALSE,
  palette = get_palette(getOption("rtemis_palette")),
  theme = choose_theme(getOption("rtemis_theme")),
  ...
) {
  # Dependencies ----
  check_dependencies("networkD3")

  # Theme ----
  check_is_S7(theme, Theme)

  netd3 <- networkD3::igraph_to_networkD3(net)
  if (is.null(groups)) {
    netd3[["nodes"]][["group"]] <- "A"
  } else {
    netd3[["nodes"]][["group"]] <- groups
  }

  # Colors ----
  if (is.null(node_col) && length(unique(netd3[["nodes"]][["group"]])) == 1) {
    node_col <- theme[["fg"]]
  }

  if (is.null(color_scale)) {
    if (length(unique(netd3[["nodes"]][["group"]])) == 1) {
      color_scale <- paste0(
        'd3.scaleOrdinal().domain(["A"]).range(["',
        adjustcolor(node_col, node_alpha),
        '"]);'
      )
    } else {
      if (is.character(palette)) {
        palette <- adjustcolor(unlist(get_palette(palette)), node_alpha)
      }
      ngroups <- length(unique(groups))
      .groups <- paste0(sort(unique(groups)), collapse = '", "')
      if (ngroups > length(palette)) {
        palette <- rep(palette, ngroups / length(palette))
      }
      .colors <- paste0(palette[seq(ngroups)], collapse = '", "')
      color_scale <- paste0(
        'd3.scaleOrdinal().domain(["',
        .groups,
        '"]).range(["',
        .colors,
        '"]);'
      )
    }
  }

  if (is.null(edge_col)) {
    if (is.null(groups)) {
      edge_col <- adjustcolor("#18A3AC", edge_alpha)
    } else {
      edge_col <- adjustcolor(theme[["fg"]], edge_alpha)
    }
  } else {
    edge_col <- adjustcolor(edge_col, edge_alpha)
  }

  # Plot ----
  fn <- networkD3::forceNetwork(
    Links = netd3[["links"]],
    Nodes = netd3[["nodes"]],
    Source = "source",
    Target = "target",
    NodeID = "name",
    Group = "group",
    colourScale = color_scale,
    linkColour = edge_col,
    opacity = 1,
    legend = legend,
    zoom = zoom
  )

  # fn$x$nodes$border <- border.groups
  fn <- htmlwidgets::onRender(
    fn,
    'function(el, x) { d3.selectAll("circle").style("stroke", d => "#ffffff00"); }'
  )

  fn
} # /rtemis::draw_graphD3


================================================
FILE: R/draw_graphjs.R
================================================
# draw_graphjs.R
# ::rtemis::
# EDG rtemis.org

#' Plot network using \pkg{threejs::graphjs}
#'
#' Interactive plotting of an \pkg{igraph} net using \pkg{threejs}.
#'
#' @param net \pkg{igraph} network.
#' @param vertex_size Numeric: Vertex size.
#' @param vertex_col Color for vertices.
#' @param vertex_label_col Color for vertex labels.
#' @param vertex_label_alpha Numeric: Transparency for `vertex_label_col`.
#' @param vertex_frame_col Color for vertex border (frame).
#' @param vertex_label Character vector: Vertex labels. Default = NULL, which will keep existing names in `net` if any. Set to NA to avoid printing vertex labels.
#' @param vertex_shape Character, vector, length 1 or N nodes: Vertex shape. See `graphjs("vertex_shape")`.
#' @param edge_col Color for edges.
#' @param edge_alpha Numeric: Transparency for edges.
#' @param edge_curved Numeric: Curvature of edges.
#' @param edge_width Numeric: Edge thickness.
#' @param layout Character: one of: "fr", "dh", "drl", "gem", "graphopt", "kk", "lgl", "mds", "sugiyama", corresponding to all the available layouts in \pkg{igraph}.
#' @param coords Output of precomputed \pkg{igraph} layout. If provided, `layout` is ignored.
#' @param layout_args List of arguments to pass to `layout` function.
#' @param cluster Character: one of: "edge_betweenness", "fast_greedy", "infomap", "label_prop", "leading_eigen", "louvain", "optimal", "spinglass", "walktrap", corresponding to all the available \pkg{igraph} clustering functions.
#' @param groups Output of precomputed \pkg{igraph} clustering. If provided, `cluster` is ignored.
#' @param cluster_config List of arguments to pass to `cluster` function.
#' @param cluster_mark_groups Logical: If TRUE, draw polygons to indicate clusters, if `groups` or `cluster` are defined.
#' @param cluster_color_vertices Logical: If TRUE, color vertices by cluster membership.
#' @param main Character: Main title.
#' @param theme `Theme` object.
#' @param palette Color vector or name of rtemis palette.
#' @param mar Numeric vector, length 4: `par`'s margin argument.
#' @param filename Character: If provided, save plot to this filepath.
#' @param verbosity Integer: Verbosity level.
#' @param ... Extra arguments to pass to `igraph::plot.igraph()`.
#'
#' @return `scatterplotThree` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' library(igraph)
#' g <- make_ring(10)
#' draw_graphjs(g)
draw_graphjs <- function(
  net,
  vertex_size = 1,
  vertex_col = NULL,
  vertex_label_col = NULL,
  vertex_label_alpha = .66,
  vertex_frame_col = NA,
  vertex_label = NULL,
  vertex_shape = "circle",
  edge_col = NULL,
  edge_alpha = .5,
  edge_curved = .35,
  edge_width = 2,
  layout = c(
    "fr",
    "dh",
    "drl",
    "gem",
    "graphopt",
    "kk",
    "lgl",
    "mds",
    "sugiyama"
  ),
  coords = NULL,
  layout_args = list(),
  cluster = NULL,
  groups = NULL,
  cluster_config = list(),
  cluster_mark_groups = TRUE,
  cluster_color_vertices = FALSE,
  main = "",
  theme = choose_theme(getOption("rtemis_theme")),
  palette = getOption("rtemis_palette", "rtms"),
  mar = rep(0, 4),
  filename = NULL,
  verbosity = 1L,
  ...
) {
  # Dependencies ----
  check_dependencies("igraph", "threejs")

  # Theme ----
  check_is_S7(theme, Theme)

  if (is.character(palette)) {
    palette <- unname(unlist(get_palette(palette)))
  }

  # Vertex names ----
  # by default use names in input net.
  if (!is.null(vertex_label)) {
    igraph::igraph.options(net, vertex_label = vertex_label)
  }

  # Layout ----
  layout <- match.arg(layout)
  if (is.null(coords) && !is.null(layout)) {
    coords <- do.call(
      getFromNamespace(paste0("layout_with_", layout), "igraph"),
      c(list(net, dim = 3), layout_args)
    )
    if (layout == "sugiyama") coords <- coords[["layout"]]
  }

  # Cluster ----
  if (is.null(groups) && !is.null(cluster)) {
    groups <- do.call(
      getFromNamespace(paste0("cluster_", cluster), "igraph"),
      c(list(net), cluster_config)
    )
  }

  if (is.null(vertex_col)) {
    vertex_col <- if (!is.null(groups)) {
      palette <- recycle(palette, length(unique(groups[["membership"]])))
      palette[groups[["membership"]]]
    } else {
      theme[["fg"]]
    }
  }

  if (is.null(vertex_label_col)) {
    vertex_label_col <- theme[["fg"]]
  }
  vertex_label_col <- adjustcolor(vertex_label_col, vertex_label_alpha)

  # Leave edge_col as NULL for auto-coloring with groups
  if (is.null(edge_col) && is.null(groups)) {
    edge_col <- "#18A3AC"
  }

  # Plot ----
  threejs::graphjs(
    net,
    layout = coords,
    vertex.color = vertex_col,
    vertex.size = vertex_size,
    vertex.shape = vertex_shape,
    vertex.label = vertex_label,
    edge.color = edge_col,
    edge.alpha = edge_alpha,
    edge.width = edge_width,
    main = main,
    bg = theme[["bg"]],
    vertex.label.color = vertex_label_col,
    vertex.frame.color = vertex_frame_col,
    edge.curved = edge_curved,
    vertex.label.family = theme[["font_family"]],
    font.main = theme[["font_family"]],
    stroke = NULL,
    verbosity = verbosity,
    ...
  )
} # /rtemis::draw_graphjs


================================================
FILE: R/draw_heatmap.R
================================================
# draw_heatmap.R
# ::rtemis::
# 2017 EDG rtemis.org

#' Interactive Heatmaps
#'
#' Draw interactive heatmaps using `heatmaply`.
#'
#' @details
#' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation.
#' 'heatmaply' unfortunately forces loading of the 'colorspace' namespace.
#'
#' @param x Input matrix.
#' @param Rowv Logical or dendrogram. If Logical: Compute dendrogram and reorder rows. Defaults to FALSE. If dendrogram: use as is, without reordering. See more at `heatmaply::heatmaply("Rowv")`.
#' @param Colv Logical or dendrogram. If Logical: Compute dendrogram and reorder columns. Defaults to FALSE. If dendrogram: use as is, without reordering. See more at `heatmaply::heatmaply("Colv")`.
#' @param cluster Logical: If TRUE, set `Rowv` and `Colv` to TRUE.
#' @param symm Logical: If TRUE, treat `x` symmetrically - `x` must be a square matrix.
#' @param cellnote Matrix with values to be displayed on hover. Defaults to `ddSci(x)`.
#' @param colorgrad_n Integer: Number of colors in gradient. Default = 101.
#' @param colors Character vector: Colors to use in gradient.
#' @param space Character: Color space to use. Default = "rgb".
#' @param lo Character: Color for low values. Default = "#18A3AC".
#' @param lomid Character: Color for low-mid values.
#' @param mid Character: Color for mid values.
#' @param midhi Character: Color for mid-high values.
#' @param hi Character: Color for high values. Default = "#F48024".
#' @param k_row Integer: Number of desired number of groups by which to color dendrogram branches in the rows. Default = 1.
#' @param k_col Integer: Number of desired number of groups by which to color dendrogram branches in the columns. Default = 1.
#' @param grid_gap Integer: Space between cells. Default = 0 (no space).
#' @param limits Float, length 2: Determine color range. Default = NULL, which automatically centers values around 0.
#' @param margins Float, length 4: Heatmap margins.
#' @param main Character: Main title.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param key_title Character: Title for the color key.
#' @param showticklabels Logical: If TRUE, show tick labels.
#' @param colorbar_len Numeric: Length of the colorbar.
#' @param row_side_colors Data frame: Column names will be label names, cells should be label colors. See `heatmaply::heatmaply("row_side_colors")`.
#' @param row_side_palette Color palette function. See `heatmaply::heatmaply("row_side_palette")`.
#' @param col_side_colors Data frame: Column names will be label names, cells should be label colors. See `heatmaply::heatmaply("col_side_colors")`.
#' @param col_side_palette Color palette function. See `heatmaply::heatmaply("col_side_palette")`.
#' @param font_size Numeric: Font size.
#' @param padding Numeric: Padding between cells.
#' @param displayModeBar Logical: If TRUE, display the plotly mode bar.
#' @param modeBar_file_format Character: File format for image exports from the mode bar.
#' @param filename Character: File name to save the plot.
#' @param file_width Numeric: Width of exported image.
#' @param file_height Numeric: Height of exported image.
#' @param file_scale Numeric: Scale of exported image.
#' @param plot_method Character: Plot method to use. Default = "plotly".
#' @param theme `Theme` object.
#' @param ... Additional arguments to be passed to `heatmaply::heatmaply`.
#'
#' @return `plotly` object.`
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' x <- rnormmat(200, 20)
#' xcor <- cor(x)
#' draw_heatmap(xcor)
draw_heatmap <- function(
  x,
  Rowv = TRUE,
  Colv = TRUE,
  cluster = FALSE,
  symm = FALSE,
  cellnote = NULL,
  colorgrad_n = 101,
  colors = NULL,
  space = "rgb",
  lo = "#18A3AC",
  lomid = NULL,
  mid = NULL,
  midhi = NULL,
  hi = "#F48024",
  k_row = 1,
  k_col = 1,
  grid_gap = 0,
  limits = NULL,
  margins = NULL,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  key_title = NULL,
  showticklabels = NULL,
  colorbar_len = .7,
  plot_method = "plotly",
  theme = choose_theme(getOption("rtemis_theme")),
  row_side_colors = NULL,
  row_side_palette = NULL,
  col_side_colors = NULL,
  col_side_palette = NULL,
  font_size = NULL,
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  ...
) {
  # Dependencies ----
  check_dependencies("heatmaply")

  # Colnames ----
  if (is.null(colnames(x))) {
    colnames(x) <- seq_len(NCOL(x))
  }
  if (is.null(rownames(x))) {
    rownames(x) <- seq_len(NROW(x))
  }

  # Margins ----
  # By default, allow 7 px per character
  if (is.null(margins)) {
    bottom <- max(nchar(colnames(x))) * 7 + 15
    left <- max(nchar(rownames(x))) * 7 + 10
    margins <- c(bottom, left, 50, 50)
  }

  # Tick Labels ----
  if (is.null(showticklabels)) {
    showticklabels <- c(
      ifelse(NCOL(x) < 50, TRUE, FALSE),
      ifelse(NROW(x) < 50, TRUE, FALSE)
    )
  }

  if (is.null(font_size)) {
    font_size <- 17.0769 - 0.2692 * ncol(x)
  }

  # Limits ----
  if (is.null(limits)) {
    maxabs <- max(abs(x), na.rm = TRUE)
    if (.2 < maxabs && maxabs < 1) {
      maxabs <- 1
    }
    limits <- c(-maxabs, maxabs)
  }

  # Theme ----
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  fg <- plotly::toRGB(theme[["fg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]])
  tick_labels_col <- plotly::toRGB(theme[["tick_labels_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])

  # Colors ----
  if (is.null(mid)) {
    mid <- theme[["bg"]]
  }
  colors <- colorgrad(
    n = colorgrad_n,
    colors = colors,
    space = space,
    lo = lo,
    lomid = lomid,
    mid = mid,
    midhi = midhi,
    hi = hi
  )

  # Cluster ----
  if (cluster) {
    Rowv <- Colv <- TRUE
  }

  # Cellnote ----
  if (!is.null(cellnote)) {
    if (cellnote == "values") cellnote <- matrix(ddSci(x), NROW(x), NCOL(x))
  }

  # heatmaply ----
  ggp2text <- ggplot2::element_text(
    family = theme[["font_family"]],
    color = theme[["tick_labels_col"]]
  )
  ggp2theme <- ggplot2::theme(
    panel.background = ggplot2::element_rect(fill = theme[["bg"]]),
    plot.background = ggplot2::element_rect(fill = theme[["bg"]]),
    legend.text = ggplot2::element_text(color = theme[["fg"]]),
    legend.background = ggplot2::element_rect(fill = theme[["bg"]]),
    text = ggp2text,
    title = ggp2text,
    axis.text = ggp2text,
    axis.text.x = ggp2text,
    axis.text.y = ggp2text,
    axis.title.x = ggp2text,
    axis.title.y = ggp2text,
    plot.subtitle = ggp2text,
    plot.caption = ggp2text
  )

  # Dendrogram ----
  if (isTRUE(Rowv)) {
    Rowv <- x |>
      dist() |>
      hclust() |>
      as.dendrogram() |>
      dendextend::set("branches_k_color", k = 1) |>
      dendextend::set("branches_lwd", 1) |>
      dendextend::set("branches_col", fg) |>
      dendextend::ladderize()
  }

  if (isTRUE(Colv)) {
    Colv <- x |>
      t() |>
      dist() |>
      hclust() |>
      as.dendrogram() |>
      dendextend::set("branches_k_color", k = 1) |>
      dendextend::set("branches_lwd", 1) |>
      dendextend::set("branches_col", fg) |>
      dendextend::ladderize()
  }

  plt <- suppressWarnings(heatmaply::heatmaply(
    x,
    Rowv = Rowv,
    Colv = Colv,
    symm = symm,
    cellnote = cellnote,
    colors = colors,
    grid_gap = grid_gap,
    limits = limits,
    margins = margins,
    key_title = key_title,
    xlab = xlab,
    ylab = ylab,
    # main = main,
    k_row = k_row,
    k_col = k_col,
    plot_method = plot_method,
    colorbar_len = colorbar_len,
    showticklabels = showticklabels,
    heatmap_layers = ggp2theme,
    row_side_colors = row_side_colors,
    row_side_palette = row_side_palette,
    col_side_colors = col_side_colors,
    col_side_palette = col_side_palette
    # side_color_layers = ggp2theme,
    # file = filename
  ))

  # Layout ----
  # '- layout ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )
  tickfont <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = tick_labels_col
  )
  .legend <- list(
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = fg
    )
  )

  plt <- plotly::layout(
    plt,
    yaxis2 = list(
      title = list(
        font = f
      ), # gets assigned to dendrogram
      titlefont = f,
      tickcolor = bg,
      showline = FALSE,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickfont = tickfont
    ),
    xaxis = list(
      title = list(
        font = f
      ),
      titlefont = f,
      tickcolor = bg,
      showline = FALSE,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickfont = tickfont
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]]
    ),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    legend = .legend
  )

  # Manual theme colors

  ## y axis tick label colors
  # plt[["x"]][["layoutAttrs"]][[2]][["yaxis2"]][["tickfont"]][["color"]]
  ## x axis tick label colors
  # plt[["x"]][["layoutAttrs"]][[2]][["xaxis"]][["tickfont"]][["color"]] <- "rgba(255, 0, 0, 1)"

  ## edge lines must be invisible
  plt[["x"]][["layout"]][["yaxis"]][["linecolor"]] <- plt[["x"]][["layout"]][[
    "xaxis2"
  ]][["linecolor"]] <- theme[["bg"]]

  # Manual layout ----
  # Set padding
  plt[["sizingPolicy"]][["padding"]] <- padding

  # Config ----
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    )
  )

  ## Override colorbar tick font color to theme[["fg"]]
  plt[["x"]][["data"]][[3]][["colorbar"]][["tickfont"]] <- list(
    family = theme[["font_family"]],
    color = tick_labels_col
  )
  plt[["x"]][["data"]][[3]][["colorbar"]][["tickcolor"]] <- theme[["tick_col"]]

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_heatmap


================================================
FILE: R/draw_leaflet.R
================================================
# draw_leaflet.R
# ::rtemis::
# 2020 EDG rtemis.org

#' Plot interactive choropleth map using \pkg{leaflet}
#'
#' @param fips Character vector: FIPS codes. (If numeric, it will be appropriately zero-padded).
#' @param values Values to map to `fips`.
#' @param names Character vector: Optional county names to appear on hover along `values`.
#' @param fillOpacity Float: Opacity for fill colors.
#' @param color_mapping Character: "Numeric" or "Bin".
#' @param col_lo Overlay color mapped to lowest value.
#' @param col_hi Overlay color mapped to highest value.
#' @param col_na Color mapped to NA values.
#' @param col_highlight Hover border color.
#' @param col_interpolate Character: "linear" or "spline".
#' @param col_bins Integer: Number of color bins to create if `color_mapping = "Bin"`.
#' @param domain Limits for mapping colors to values. Default = NULL and set to range.
#' @param weight Float: Weight of county border lines.
#' @param color Color of county border lines.
#' @param alpha Float: Overlay transparency.
#' @param bg_tile_provider Background tile (below overlay colors), one of `leaflet::providers`.
#' @param bg_tile_alpha Float: Background tile transparency.
#' @param fg_tile_provider Foreground tile (above overlay colors), one of `leaflet::providers`.
#' @param legend_position Character: One of: "topright", "bottomright", "bottomleft", "topleft".
#' @param legend_alpha Float: Legend box transparency.
#' @param legend_title Character: Defaults to name of `values` variable.
#' @param init_lng Float: Center map around this longitude (in decimal form). Default = -98.54180833333334 (US geographic center).
#' @param init_lat Float: Center map around this latitude (in decimal form). Default = 39.207413888888894 (US geographic center).
#' @param init_zoom Integer: Initial zoom level (depends on device, i.e. window, size).
#' @param stroke Logical: If TRUE, draw polygon borders.
#'
#' @return `leaflet` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' fips <- c(06075, 42101)
#' population <- c(874961, 1579000)
#' names <- c("SF", "Philly")
#' draw_leaflet(fips, population, names)
# NA in legend issue: https://github.com/rstudio/leaflet/issues/615
draw_leaflet <- function(
  fips,
  values,
  names = NULL,
  fillOpacity = 1,
  color_mapping = c("Numeric", "Bin"),
  col_lo = "#0290EE",
  col_hi = "#FE4AA3",
  col_na = "#303030",
  col_highlight = "#FE8A4F",
  col_interpolate = c("linear", "spline"),
  col_bins = 21, # for color_mapping Bin
  domain = NULL,
  weight = .5,
  color = "black",
  alpha = 1,
  bg_tile_provider = leaflet::providers[["CartoDB.Positron"]],
  bg_tile_alpha = .67,
  fg_tile_provider = leaflet::providers[["CartoDB.PositronOnlyLabels"]],
  legend_position = c(
    "topright",
    "bottomright",
    "bottomleft",
    "topleft"
  ),
  legend_alpha = .8,
  legend_title = NULL,
  init_lng = -98.54180833333334,
  init_lat = 39.207413888888894,
  init_zoom = 3,
  stroke = TRUE
) {
  # Dependencies ----
  check_dependencies("leaflet", "geojsonio", "htmltools", "htmlwidgets", "sf")

  # Arguments ----
  vals_name <- deparse(substitute(values))
  color_mapping <- match.arg(color_mapping)
  col_interpolate <- match.arg(col_interpolate)
  palette <- colorRamp(
    colors = c(col_lo, col_hi),
    interpolate = col_interpolate
  )
  legend_position <- match.arg(legend_position)
  if (is.null(legend_title)) {
    legend_title <- labelify(vals_name)
  }

  # State vs. County data ----
  if (max(nchar(fips)) < 3) {
    geo <- readRDS(
      system.file(
        "extdata",
        "us-states.rds",
        package = "rtemis"
      )
    )
    fips <- if (is.character(fips)) {
      fips
    } else {
      sprintf("%02d", fips)
    }
  } else {
    geo <- readRDS(
      system.file(
        "extdata",
        "us-counties.rds",
        package = "rtemis"
      )
    )
    fips <- if (is.character(fips)) {
      fips
    } else {
      sprintf("%05d", fips)
    }
  }

  # Match input county-level data
  index <- match(geo[["id"]], fips)
  geo[["val"]] <- values[index]

  # Colorscale ----
  if (color_mapping == "Numeric") {
    pal <- leaflet::colorNumeric(
      palette = palette,
      domain = domain,
      na.color = col_na,
      alpha = TRUE
    )
  } else {
    pal <- leaflet::colorBin(
      palette = palette,
      domain = domain,
      na.color = col_na,
      bins = col_bins
    )
  }

  # Hover labels ----
  .labs <- values[index]
  if (!is.null(names)) {
    .names <- names[index]
    labels <- lapply(seq_len(NROW(geo)), function(i) {
      if (is.na(.labs[i])) {
        '<div style="color:#7f7f7f;">N/A</div>'
      } else {
        sprintf("<strong>%s</strong><br/>%g", .names[i], .labs[i])
      }
    }) |>
      lapply(htmltools::HTML)
  } else {
    labels <- lapply(seq_len(NROW(geo)), function(i) {
      if (is.na(.labs[i])) {
        '<div style="color:#7f7f7f;">N/A</div>'
      } else {
        sprintf("%g", .labs[i])
      }
    }) |>
      lapply(htmltools::HTML)
  }
  geo[["labels"]] <- labels

  # leaflet map ----
  map <- leaflet::leaflet(geo) |>
    leaflet::addProviderTiles(
      provider = bg_tile_provider,
      options = leaflet::providerTileOptions(opacity = bg_tile_alpha)
    ) |>
    leaflet::addMapPane("polygons", zIndex = 410) |>
    leaflet::addMapPane("tiles", zIndex = 420) |>
    leaflet::addPolygons(
      fillColor = ~ pal(val),
      fillOpacity = fillOpacity,
      opacity = alpha,
      weight = weight,
      color = color,
      stroke = stroke,
      group = legend_title,
      options = leaflet::pathOptions(pane = "polygons"),
      highlight = leaflet::highlightOptions(
        weight = 2,
        color = col_highlight,
        bringToFront = TRUE
      ),
      label = labels,
      labelOptions = leaflet::labelOptions(
        style = list("font-weight" = "normal", padding = "2px 2px"),
        textsize = "15px",
        direction = "auto"
      )
    ) |>
    leaflet::addProviderTiles(
      provider = fg_tile_provider,
      options = leaflet::pathOptions(pane = "tiles")
    ) |>
    leaflet::addLegend(
      position = legend_position,
      pal = pal,
      values = geo[["val"]],
      opacity = legend_alpha,
      title = legend_title
    ) |>
    leaflet::addLayersControl(overlayGroups = c(legend_title)) |>
    leaflet::setView(lng = init_lng, lat = init_lat, zoom = init_zoom)

  insert <- htmltools::tags[["style"]](
    type = "text/css",
    "div.info.legend.leaflet-control br {clear: both;}"
  )
  map <- htmlwidgets::prependContent(map, insert)
  map
} # /rtemis:: draw_leaflet


================================================
FILE: R/draw_pie.R
================================================
# draw_pie.R
# ::rtemis::
# 2019 EDG rtemis.org

#' Interactive Pie Chart
#'
#' Draw interactive pie charts using `plotly`.
#'
#' @param x data.frame: Input: Either a) 1 numeric column with categories defined by rownames, or
#' b) two columns, the first is category names, the second numeric or c) a numeric vector with categories defined using
#' the `category.names` argument.
#' @param main Character: Plot title. Default = NULL, which results in `colnames(x)[1]`.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param alpha Numeric: Alpha for the pie slices.
#' @param bg Character: Background color.
#' @param plot_bg Character: Plot background color.
#' @param theme `Theme` object.
#' @param palette Character vector: Colors to use.
#' @param category_names Character, vector, length = NROW(x): Category names. Default = NULL, which uses
#' either `rownames(x)`, or the first column of `x` if `ncol(x) = 2`.
#' @param textinfo Character: Info to show over each slice: "label", "percent", "label+percent".
#' @param font_size Integer: Font size for labels.
#' @param labs_col Character: Color of labels.
#' @param legend Logical: If TRUE, show legend.
#' @param legend_col Character: Color for legend.
#' @param sep_col Character: Separator color.
#' @param margin List: Margin settings.
#' @param padding Numeric: Padding between cells.
#' @param displayModeBar Logical: If TRUE, display the plotly mode bar.
#' @param modeBar_file_format Character: File format for image exports from the mode bar.
#' @param filename Character: File name to save plot.
#' @param file_width Integer: Width for saved file.
#' @param file_height Integer: Height for saved file.
#' @param file_scale Numeric: Scale for saved file.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' draw_pie(VADeaths[, 1, drop = FALSE])
draw_pie <- function(
  x,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  alpha = .8,
  bg = NULL,
  plot_bg = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  category_names = NULL,
  textinfo = "label+percent",
  font_size = 16,
  labs_col = NULL,
  legend = TRUE,
  legend_col = NULL,
  sep_col = NULL,
  margin = list(b = 50, l = 50, t = 50, r = 20),
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Names ----
  .input_name <- deparse(substitute(x))
  .rownames <- rownames(x)
  .colnames <- colnames(x)
  x <- as.data.frame(x)

  .cat_names <- category_names

  if (NCOL(x) == 2) {
    .cat_names <- as.character(x[, 1])
    x <- x[, 2, drop = FALSE]
    if (is.null(main)) main <- .colnames[2]
  }

  if (is.null(.cat_names)) {
    if (!is.null(.rownames)) {
      .cat_names <- .rownames
    } else {
      .cat_names <- LETTERS[seq_len(NROW(x))]
    }
  }

  if (is.null(main)) {
    if (!is.null(.colnames)) {
      main <- labelify(.colnames[1])
    } else {
      main <- labelify(.input_name)
    }
  }

  if (!is.null(main)) {
    main <- paste0("<b>", main, "</b>")
  }

  # Colors ----
  p <- NROW(x)
  col <- recycle(palette, seq_len(p))

  # Theme ----
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])

  if (is.null(legend_col)) {
    legend_col <- labs_col
  }
  sep_col <- if (is.null(sep_col)) bg else plotly::toRGB(sep_col)

  # plotly ----
  plt <- plotly::plot_ly(
    labels = .cat_names,
    values = x[, 1],
    type = "pie",
    textinfo = textinfo,
    insidetextfont = list(color = "#FFFFFF"),
    outsidetextfont = list(color = labs_col),
    marker = list(
      colors = unlist(col),
      line = list(color = sep_col, width = 1)
    )
  )

  ## layout ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )
  .legend <- list(
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = legend_col
    )
  )
  plt <- plotly::layout(
    plt,
    yaxis = list(
      title = ylab,
      showline = FALSE,
      titlefont = f,
      showgrid = FALSE,
      zeroline = FALSE
    ),
    xaxis = list(
      title = xlab,
      showline = FALSE,
      titlefont = f,
      showgrid = FALSE,
      zeroline = FALSE
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      )
    ),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    margin = margin,
    showlegend = legend,
    legend = .legend
  )

  # Padding
  plt[["sizingPolicy"]][["padding"]] <- padding
  # Config
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    )
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_pie.R


================================================
FILE: R/draw_protein.R
================================================
# draw_protein
# ::rtemis::
# 2022- EDG rtemis.org

#' Plot an amino acid sequence with annotations
#'
#' Plot an amino acid sequence with multiple site and/or region annotations.
#'
#' @param x Character vector: amino acid sequence (1-letter abbreviations) OR
#' `a3` object OR Character: path to JSON file OR Character: UniProt accession number.
#' @param site Named list of lists with indices of sites. These will be
#' highlighted by coloring the border of markers.
#' @param region Named list of lists with indices of regions. These will be
#' highlighted by coloring the markers and lines of regions using the
#' `palette` colors.
#' @param ptm List of post-translational modifications.
#' @param cleavage_site List of cleavage sites.
#' @param variant List of variant information.
#' @param disease_variants List of disease variant information.
#' @param n_per_row Integer: Number of amino acids to show per row.
#' @param main Character: Main title.
#' @param main_xy Numeric vector, length 2: x and y coordinates for title.
#' e.g. if `main_xref` and `main_yref` are `"paper"`:
#' `c(0.055, .975)` is top left, `c(.5, .975)` is top and
#' middle.
#' @param main_xref Character: xref for title.
#' @param main_yref Character: yref for title.
#' @param main_xanchor Character: xanchor for title.
#' @param main_yanchor Character: yanchor for title.
#' @param layout Character: "1curve", "grid": type of layout to use.
#' @param show_markers Logical: If TRUE, show amino acid markers.
#' @param show_labels Logical: If TRUE, annotate amino acids with elements.
#' @param font_size Integer: Font size for labels.
#' @param label_col Color for labels.
#' @param scatter_mode Character: Mode for scatter plot.
#' @param marker_size Integer: Size of markers.
#' @param marker_col Color for markers.
#' @param marker_alpha Numeric: Alpha for markers.
#' @param marker_symbol Character: Symbol for markers.
#' @param line_col Color for lines.
#' @param line_alpha Numeric: Alpha for lines.
#' @param line_width Numeric: Width for lines.
#' @param show_full_names Logical: If TRUE, show full names of amino acids.
#' @param region_scatter_mode Character: Mode for scatter plot.
#' @param region_style Integer: Style for regions.
#' @param region_marker_size Integer: Size of region markers.
#' @param region_marker_alpha Numeric: Alpha for region markers.
#' @param region_marker_symbol Character: Symbol for region markers.
#' @param region_line_dash Character: Dash for region lines.
#' @param region_line_shape Character: Shape for region lines.
#' @param region_line_smoothing Numeric: Smoothing for region lines.
#' @param region_line_width Numeric: Width for region lines.
#' @param region_line_alpha Numeric: Alpha for region lines.
#' @param theme `Theme` object.
#' @param region_palette Named list of colors for regions.
#' @param region_outline_only Logical: If TRUE, only show outline of regions.
#' @param region_outline_pad Numeric: Padding for region outline.
#' @param region_pad Numeric: Padding for region.
#' @param region_fill_alpha Numeric: Alpha for region fill.
#' @param region_fill_shape Character: Shape for region fill.
#' @param region_fill_smoothing Numeric: Smoothing for region fill.
#' @param bpadcx Numeric: Padding for region border.
#' @param bpadcy Numeric: Padding for region border.
#' @param site_marker_size Integer: Size of site markers.
#' @param site_marker_symbol Character: Symbol for site markers.
#' @param site_marker_alpha Numeric: Alpha for site markers.
#' @param site_border_width Numeric: Width for site borders.
#' @param site_palette Named list of colors for sites.
#' @param variant_col Color for variants.
#' @param disease_variant_col Color for disease variants.
#' @param showlegend_ptm Logical: If TRUE, show legend for PTMs.
#' @param ptm_col Named list of colors for PTMs.
#' @param ptm_symbol Character: Symbol for PTMs.
#' @param ptm_offset Numeric: Offset for PTMs.
#' @param ptm_pad Numeric: Padding for PTMs.
#' @param ptm_marker_size Integer: Size of PTM markers.
#' @param clv_col Color for cleavage site annotations.
#' @param clv_symbol Character: Symbol for cleavage site annotations.
#' @param clv_offset Numeric: Offset for cleavage site annotations.
#' @param clv_pad Numeric: Padding for cleavage site annotations.
#' @param clv_marker_size Integer: Size of cleavage site annotation markers.
#' @param annotate_position_every Integer: Annotate every nth position.
#' @param annotate_position_alpha Numeric: Alpha for position annotations.
#' @param annotate_position_ay Numeric: Y offset for position annotations.
#' @param position_font_size Integer: Font size for position annotations.
#' @param legend_xy Numeric vector, length 2: x and y coordinates for legend.
#' @param legend_xanchor Character: xanchor for legend.
#' @param legend_yanchor Character: yanchor for legend.
#' @param legend_orientation Character: Orientation for legend.
#' @param legend_col Color for legend.
#' @param legend_bg Color for legend background.
#' @param legend_border_col Color for legend border.
#' @param legend_borderwidth Numeric: Width for legend border.
#' @param legend_group_gap Numeric: Gap between legend groups.
#' @param margin List: Margin settings.
#' @param showgrid_x Logical: If TRUE, show x grid.
#' @param showgrid_y Logical: If TRUE, show y grid.
#' @param automargin_x Logical: If TRUE, use automatic margin for x axis.
#' @param automargin_y Logical: If TRUE, use automatic margin for y axis.
#' @param xaxis_autorange Logical: If TRUE, use automatic range for x axis.
#' @param yaxis_autorange Character: If TRUE, use automatic range for y axis.
#' @param scaleanchor_y Character: Scale anchor for y axis.
#' @param scaleratio_y Numeric: Scale ratio for y axis.
#' @param hoverlabel_align Character: Alignment for hover label.
#' @param displayModeBar Logical: If TRUE, display mode bar.
#' @param modeBar_file_format Character: File format for mode bar.
#' @param scrollZoom Logical: If TRUE, enable scroll zoom.
#' @param filename Character: File name to save plot.
#' @param file_width Integer: Width for saved file.
#' @param file_height Integer: Height for saved file.
#' @param file_scale Numeric: Scale for saved file.
#' @param width Integer: Width for plot.
#' @param height Integer: Height for plot.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' \dontrun{
#'   # Reads sequence from UniProt server
#'   tau <- seqinr::read.fasta("https://rest.uniprot.org/uniprotkb/P10636.fasta",
#'     seqtype = "AA"
#'   )
#'   draw_protein(as.character(tau[[1]]))
#'
#'   # or directly using the UniProt accession number:
#'   draw_protein("P10636")
#' }
draw_protein <- function(
  x,
  site = NULL,
  region = NULL,
  ptm = NULL,
  cleavage_site = NULL,
  variant = NULL,
  disease_variants = NULL,
  # label_group = NULL,
  n_per_row = NULL,
  main = NULL,
  main_xy = c(0.055, .975),
  main_xref = "paper",
  main_yref = "paper",
  main_xanchor = "middle",
  main_yanchor = "top",
  layout = c("simple", "grid", "1curve", "2curve"),
  show_markers = TRUE,
  show_labels = TRUE,
  font_size = 18,
  label_col = NULL,
  scatter_mode = "markers+lines",
  # AA marker
  marker_size = 28,
  marker_col = NULL, # "gray18",
  marker_alpha = 1,
  marker_symbol = "circle",
  # AA line
  line_col = NULL, # "gray18",
  line_alpha = 1,
  line_width = 2,
  # Hover names
  show_full_names = TRUE,
  # regions
  region_scatter_mode = "markers+lines",
  region_style = 3,
  region_marker_size = marker_size,
  region_marker_alpha = .6,
  region_marker_symbol = "circle",
  region_line_dash = "solid",
  region_line_shape = "line",
  region_line_smoothing = 1,
  region_line_width = 1,
  region_line_alpha = .6,
  theme = choose_theme(getOption("rtemis_theme")),
  region_palette = getOption("rtemis_palette", "rtms"),
  region_outline_only = FALSE,
  region_outline_pad = 2, # for fake polys
  region_pad = .35, # for real polys
  region_fill_alpha = .1666666,
  region_fill_shape = "line",
  region_fill_smoothing = 1,
  bpadcx = .5,
  bpadcy = .5,
  # Sites - colored marker border
  site_marker_size = marker_size,
  site_marker_symbol = marker_symbol,
  site_marker_alpha = 1,
  site_border_width = 1.5,
  site_palette = getOption("rtemis_palette", "rtms"),
  # Variants
  variant_col = "#FA6E1E",
  # Text groups
  disease_variant_col = "#E266AE", # "#c982d7"
  # PTMs
  showlegend_ptm = TRUE,
  ptm_col = NULL,
  ptm_symbol = "circle",
  ptm_offset = .12,
  ptm_pad = .35,
  ptm_marker_size = marker_size / 4.5,
  # Cleavage sites
  clv_col = NULL,
  clv_symbol = "triangle-down",
  clv_offset = .12,
  clv_pad = .35,
  clv_marker_size = marker_size / 4,
  # Position annotations
  annotate_position_every = 10,
  annotate_position_alpha = .5,
  annotate_position_ay = -.4 * marker_size,
  position_font_size = font_size - 6,
  # Legend
  legend_xy = c(.97, .954),
  legend_xanchor = "left",
  legend_yanchor = "top",
  legend_orientation = "v",
  legend_col = NULL,
  legend_bg = "#FFFFFF00",
  legend_border_col = "#FFFFFF00",
  legend_borderwidth = 0,
  legend_group_gap = 0,
  margin = list(b = 0, l = 0, t = 0, r = 0, pad = 0),
  # Axes
  showgrid_x = FALSE,
  showgrid_y = FALSE,
  automargin_x = TRUE,
  automargin_y = TRUE,
  xaxis_autorange = TRUE,
  yaxis_autorange = "reversed",
  scaleanchor_y = "x",
  scaleratio_y = 1,
  # Layout
  hoverlabel_align = "left",
  # config
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  scrollZoom = TRUE,
  # file out
  filename = NULL,
  file_width = 1320,
  file_height = 990,
  file_scale = 1,
  width = NULL,
  height = NULL,
  verbosity = 1L
) {
  # Data ----
  if (inherits(x, "A3")) {
    dat <- x
    x <- dat[["sequence"]]
    site <- iflengthy(dat[["annotations"]][["site"]])
    region <- iflengthy(dat[["annotations"]][["region"]])
    ptm <- iflengthy(dat[["annotations"]][["ptm"]])
    cleavage_site <- iflengthy(dat[["annotations"]][["cleavage_site"]])
    variant <- iflengthy(dat[["annotations"]][["variant"]])
    disease_variants <- iflengthy(dat[["annotations"]][["site"]][[
      "disease_associated_variant"
    ]])
  }
  if (length(x) == 1) {
    if (grepl(".json$", x)) {
      dat <- jsonlite::read_json(
        x,
        simplifyVector = TRUE,
        simplifyMatrix = FALSE
      )
      x <- dat[["sequence"]]
      disease_variants <- dat[["annotations"]][["site"]][[
        "disease_associated_variant"
      ]]
      site <- dat[["annotations"]][["site"]]
      region <- dat[["annotations"]][["region"]]
      ptm <- dat[["annotations"]][["ptm"]]
      cleavage_site <- dat[["annotations"]][["cleavage_site"]]
    } else {
      dat <- uniprot_get(x, verbosity = verbosity)
      x <- dat[["sequence"]]
      # if (is.null(main)) main <- dat[["identifier"]]
    }
  }
  x <- toupper(x)
  position <- seq_along(x)
  n <- length(x)
  if (is.null(n_per_row)) {
    n_per_row <- ceiling(sqrt(n))
  }

  # Arguments ----
  layout <- match.arg(layout)

  # Coordinates ----
  if (layout == "grid") {
    # '- grid ----
    # 1:n_per_row, n_per_row:1, till n
    xs <- rep(c(1:n_per_row, n_per_row:1), length.out = n)
    nrows <- ceiling(n / n_per_row)
    ys <- rep(1:nrows, each = n_per_row, length = n)
  } else if (layout == "1curve") {
    # '- 1curve ----
    xs <- rep(c(1:n_per_row, (n_per_row - 1):2), length.out = n)
    nrows <- ceiling(1 + (n / n_per_row - 1))
    ys <- c(
      1,
      rep(seq(1, nrows * 4, 3), each = n_per_row - 1, length = n - 1)
    )
    # drop the n_per_row, then n_per_row - 1
    ys[seq(n_per_row, n, n_per_row - 1)] <-
      ys[seq(n_per_row, n, n_per_row - 1)] + 1.5
  } else if (layout == "simple") {
    # '- simple ----
    # if each point is 1 unit apart, border points must be sqrt(3)/2 away
    xs <- rep(c(1:n_per_row, (n_per_row - 1):2), length.out = n)
    nrows <- ceiling(1 + (n / n_per_row))
    ys <- c(
      1,
      rep(seq(1, nrows), each = n_per_row - 1, length = n - 1)
    )
    # every n_per_row, move to .5 up and sqrt(3)/2 right, left from previous
    # Right border
    ys[seq(n_per_row, n, (2 * n_per_row - 2))] <-
      ys[seq(n_per_row, n, (2 * n_per_row - 2))] + .5
    xs[seq(n_per_row, n, (2 * n_per_row - 2))] <-
      xs[seq(n_per_row, n, 2 * n_per_row - 2)] - 1 + sqrt(3) / 2
    # Left border
    ys[seq((2 * n_per_row) - 1, n, (2 * n_per_row - 2))] <-
      ys[seq((2 * n_per_row) - 1, n, (2 * n_per_row - 2))] + .5
    xs[seq((2 * n_per_row) - 1, n, (2 * n_per_row - 2))] <-
      xs[seq((2 * n_per_row) - 1, n, (2 * n_per_row - 2))] + 1 - sqrt(3) / 2
  } else if (layout == "2curve") {
    # '- 2curve ----
    xs <- rep(c(1:n_per_row, n_per_row:1), length.out = n)
    nrows <- ceiling(n / n_per_row)
    ys <- rep(1:nrows * 3 - 2, each = n_per_row, length = n)
    ys[seq(n_per_row, n, n_per_row)] <-
      ys[seq(n_per_row, n, n_per_row)] + 1
    ys[seq(n_per_row, n, n_per_row) + 1] <-
      ys[seq(n_per_row, n, n_per_row)] + 1
  }
  # Theme ----
  check_is_S7(theme, Theme)

  if (is.null(label_col)) {
    label_col <- theme[["fg"]]
  }
  label_col <- recycle(label_col, x)
  if (is.null(marker_col)) {
    marker_col <- color_fade(theme[["fg"]], theme[["bg"]], .9)
  }
  marker_col <- plotly::toRGB(marker_col, alpha = marker_alpha)
  if (is.null(line_col)) {
    line_col <- color_fade(theme[["fg"]], theme[["bg"]], .9)
  }
  line_col <- plotly::toRGB(line_col, alpha = marker_alpha)

  main_col <- plotly::toRGB(theme[["main_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  if (is.null(legend_col)) {
    legend_col <- labs_col
  }
  grid_col <- plotly::toRGB(theme[["grid_col"]], theme[["grid_alpha"]])

  # Palette ----
  if (is.character(region_palette)) {
    region_palette <- get_palette(region_palette)
  }
  if (is.character(site_palette)) {
    site_palette <- get_palette(site_palette)
  }

  # Match abbreviations to full names ----
  if (show_full_names) {
    input <- switch(max(nchar(x)), "1" = "1", "3" = "3", "full")

    if (input == "full") {
      xnames <- x
    } else {
      if (input == "1") {
        xnames <- factor(
          x,
          levels = aa[["Abbreviation1"]],
          labels = aa[["Name"]]
        ) |>
          as.character()
      } else {
        xnames <- factor(
          x,
          levels = toupper(aa[["Abbreviation3"]]),
          labels = aa[["Name"]]
        ) |>
          as.character()
      }
    }
  } else {
    xnames <- x
  }

  # Variants: overwrite xnames with tooltip info
  if (!is.null(variant)) {
    for (i in seq_along(variant)) {
      varidi <- variant[[i]][["position"]]
      xnames[varidi] <- paste0(
        xnames[varidi],
        "\n\n",
        list2html(variant[[i]], col = variant_col)
      )
    }
  }

  # plotly ----
  plt <- plotly::plot_ly(
    width = width,
    height = height
  )

  # AA markers and lines ----
  aaname <- if (is.null(disease_variants)) {
    "1&#176; structure"
  } else {
    paste0(
      "1&#176; structure (",
      "<span style='color:",
      disease_variant_col,
      "'>Disease variants</span>)"
    )
  }
  if (show_markers) {
    clvtext <- if (!is.null(cleavage_site)) {
      # Get cleavage sites for each amino acid
      sapply(position, \(i) {
        if (i %in% unlist(cleavage_site)) {
          paste0(
            "\n<b><em>Cleavage site for:</em></b>\n",
            paste0(
              names(cleavage_site)[sapply(cleavage_site, \(x) i %in% x)],
              collapse = "\n"
            )
          )
        } else {
          ""
        }
      })
    } else {
      NULL
    }
    plt <- plt |>
      plotly::add_trace(
        x = xs,
        y = ys,
        type = "scatter",
        mode = scatter_mode,
        marker = list(
          color = plotly::toRGB(marker_col, alpha = marker_alpha),
          size = marker_size,
          symbol = marker_symbol
        ),
        line = list(
          color = plotly::toRGB(line_col, alpha = line_alpha),
          width = line_width
        ),
        text = paste0(position, ": ", xnames, clvtext),
        name = aaname,
        # hoverinfo = marker.hoverinfo
        hoverinfo = "text"
      )
  }
  # regions ----
  if (!is.null(region)) {
    region_names <- names(region)
    if (is.null(region_names)) {
      region_names <- paste("region", seq_along(region))
    }

    if (region_style == 1) {
      # '- region style 1 ----
      # for overlapping sets within each region
      for (i in seq_along(region)) {
        for (j in seq_along(region[[i]])) {
          plt <- plt |>
            plotly::add_trace(
              x = xs[region[[i]][[j]]],
              y = ys[region[[i]][[j]]],
              type = "scatter",
              mode = region_scatter_mode,
              marker = list(
                color = plotly::toRGB(
                  region_palette[[i]],
                  alpha = region_marker_alpha
                ),
                size = region_marker_size,
                symbol = region_marker_symbol
              ),
              line = list(
                color = plotly::toRGB(
                  region_palette[[i]],
                  alpha = region_line_alpha
                ),
                dash = region_line_dash,
                shape = region_line_shape,
                smoothing = region_line_smoothing,
                width = region_line_width
              ),
              name = region_names[i],
              legendgroup = region_names[i],
              showlegend = j == 1
            )
          if (region_outline_only) {
            # simulate rounded selection around AAs
            # need region_marker_size & line_width > marker_size
            plt <- plt |>
              plotly::add_trace(
                x = xs[region[[i]][[j]]],
                y = ys[region[[i]][[j]]],
                type = "scatter",
                mode = region_scatter_mode,
                marker = list(
                  color = plotly::toRGB(
                    # marker_col,
                    theme[["bg"]],
                    alpha = marker_alpha
                  ),
                  size = region_marker_size - region_outline_pad,
                  symbol = region_marker_symbol
                ),
                line = list(
                  color = plotly::toRGB(
                    # line_col,
                    theme[["bg"]],
                    alpha = line_alpha
                  ),
                  shape = region_line_shape,
                  smoothing = region_line_smoothing,
                  width = region_line_width - region_outline_pad
                ),
                name = NULL,
                legendgroup = region_names[i],
                showlegend = FALSE
              )
            plt <- plt |>
              plotly::add_trace(
                x = xs[region[[i]][[j]]],
                y = ys[region[[i]][[j]]],
                type = "scatter",
                mode = scatter_mode,
                marker = list(
                  color = plotly::toRGB(marker_col, alpha = marker_alpha),
                  size = marker_size,
                  symbol = marker_symbol
                ),
                line = list(
                  color = plotly::toRGB(line_col, alpha = line_alpha),
                  width = line_width
                ),
                name = NULL,
                legendgroup = region_names[i],
                showlegend = FALSE
              )
          }
        }
      }
    } else if (region_style == 2) {
      # '- region style 2 ----
      # for non-overlapping sets within each region
      for (i in seq_along(region)) {
        plt <- plt |>
          plotly::add_trace(
            x = xs[unlist(region[[i]])],
            y = ys[unlist(region[[i]])],
            type = "scatter",
            mode = "markers",
            marker = list(
              color = plotly::toRGB(
                region_palette[[i]],
                alpha = region_marker_alpha
              ),
              size = region_marker_size,
              symbol = region_marker_symbol
            ),
            name = region_names[i]
          )
      }
    } else {
      # '- region style 3 ----
      # for 1curve only
      # region polys: get marker direction and location:
      # left, leftborder, right, rightborder
      dl <- c(
        "r",
        rep(c("r", "l"), each = n_per_row - 1, length = n - 1)
      )
      dl[seq(n_per_row, n, n_per_row - 1)] <-
        paste0(dl[seq(n_per_row, n, n_per_row - 1)], "b")
      # i: IDI of region group
      for (i in seq_along(region)) {
        # each region's directions
        region_dl <- lapply(seq_along(region[[i]]), \(j) {
          dl[region[[i]][[j]]]
        })

        region_poly_xy <- lapply(seq_along(region[[i]]), \(j) {
          poly_xys(
            xs = xs[region[[i]][[j]]],
            ys = ys[region[[i]][[j]]],
            d = region_dl[[j]],
            pad = region_pad,
            bpadcx = bpadcx,
            bpadcy = bpadcy
          )
        })

        for (j in seq_along(region[[i]])) {
          plt <- plt |>
            plotly::add_polygons(
              x = region_poly_xy[[j]][["px"]],
              y = region_poly_xy[[j]][["py"]],
              line = list(
                color = region_palette[[i]],
                width = region_line_width,
                shape = region_fill_shape,
                smoothing = region_fill_smoothing
              ),
              fillcolor = plotly::toRGB(
                region_palette[[i]],
                alpha = region_fill_alpha
              ),
              name = region_names[i],
              legendgroup = region_names[i],
              showlegend = j == 1
            )
        }
      } # each region's individual regions' coords
    }
  } # /regions

  # Sites ----
  if (!is.null(site)) {
    site_names <- names(site)
    if (is.null(site_names)) {
      site_names <- paste("Site", seq_along(site))
    }
    # for overlapping sets within each region
    for (i in seq_along(site)) {
      for (j in seq_along(site[[i]])) {
        plt <- plt |>
          plotly::add_trace(
            x = xs[site[[i]][[j]]],
            y = ys[site[[i]][[j]]],
            type = "scatter",
            mode = "markers",
            marker = list(
              color = plotly::toRGB(
                "#000000",
                alpha = 0
              ),
              size = site_marker_size,
              symbol = site_marker_symbol,
              line = list(
                color = plotly::toRGB(
                  site_palette[[i]],
                  alpha = site_marker_alpha
                ),
                width = site_border_width
              )
            ),
            name = site_names[i],
            legendgroup = site_names[i],
            showlegend = j == 1
          )
      }
    }
  } # /sites

  # PTMs ----
  # Note: Do not show both PTMs and cleavage sites using the same padding
  if (!is.null(ptm)) {
    if (verbosity > 1L) {
      msg_info("Adding PTM markers...")
    }
    if (is.null(ptm_col)) {
      ptm_col <- 1 + seq_along(ptm)
    }
    ptm_symbol <- recycle(ptm_symbol, ptm)
    ptm_names <- names(ptm)
    for (i in seq_along(ptm)) {
      polyoffset <- npad(i, n = length(ptm), pad = ptm_pad)
      plt <- plt |>
        plotly::add_trace(
          x = xs[ptm[[i]]] + polyoffset[1],
          y = ys[ptm[[i]]] + polyoffset[2],
          type = "scatter",
          mode = "markers",
          marker = list(
            color = plotly::toRGB(ptm_col[[i]]),
            size = ptm_marker_size,
            symbol = ptm_symbol[i]
          ),
          name = ptm_names[i],
          showlegend = showlegend_ptm
        )
    }
  }
  # Cleavage sites ----
  # Note: Do not show both PTMs and cleavage sites using the same padding
  if (!is.null(cleavage_site)) {
    if (verbosity > 1L) {
      msg_info("Adding cleavage site markers...")
    }
    if (is.null(clv_col)) {
      clv_col <- c(
        colorspace::qualitative_hcl(
          (length(cleavage_site)),
          h = c(40, 360),
          c = 120,
          l = 50
        )
      )
    }
    clv_symbol <- recycle(clv_symbol, cleavage_site)
    clv_names <- names(cleavage_site)
    for (i in seq_along(cleavage_site)) {
      polyoffset <- npad(i, n = length(cleavage_site), pad = clv_pad)
      plt <- plt |>
        plotly::add_trace(
          x = xs[cleavage_site[[i]]] + polyoffset[1],
          y = ys[cleavage_site[[i]]] + polyoffset[2],
          type = "scatter",
          mode = "markers",
          marker = list(
            color = plotly::toRGB(clv_col[[i]]),
            size = clv_marker_size,
            symbol = clv_symbol[i]
          ),
          name = clv_names[i],
          showlegend = showlegend_ptm
        )
    }
  }

  # AA labels ----
  if (show_labels) {
    # Variants
    if (!is.null(variant)) {
      variant_idi <- sapply(variant, \(v) v[["position"]])
      label_col[variant_idi] <- variant_col
    }
    # Disease variants
    if (!is.null(disease_variants)) {
      label_col[disease_variants] <- disease_variant_col
    }
    label_group <- factor(label_col)
    label_group_col <- levels(label_group)
    for (i in seq_along(label_group_col)) {
      idx <- label_group == label_group_col[i]
      plt <- plt |>
        plotly::add_annotations(
          xref = "x",
          yref = "y",
          x = xs[idx],
          y = ys[idx],
          text = x[idx],
          font = list(
            family = theme[["font_family"]],
            size = font_size,
            color = label_group_col[[i]]
          ),
          showarrow = FALSE
          # name = label_group.levels[[i]],
          # showlegend = nchar(label_group.levels[[i]]) > 0
        )
    }
    # }
  }

  # Position annotations ----
  if (
    !is.null(annotate_position_every) && length(x) > annotate_position_every
  ) {
    idxpos <- seq(annotate_position_every, n, annotate_position_every)
    plt <- plt |>
      plotly::add_annotations(
        x = xs[idxpos],
        y = ys[idxpos],
        xref = "x",
        yref = "y",
        xanchor = "middle",
        yanchor = "bottom",
        ax = 0,
        ay = annotate_position_ay,
        text = idxpos,
        showarrow = TRUE,
        arrowcolor = "#ffffff00",
        font = list(
          size = position_font_size,
          family = theme[["font_family"]],
          color = plotly::toRGB(theme[["fg"]], alpha = annotate_position_alpha)
        )
      )
  }

  # Layout ----
  .legend <- list(
    x = legend_xy[1],
    xanchor = legend_xanchor,
    y = legend_xy[2],
    yanchor = legend_yanchor,
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = legend_col
    ),
    orientation = legend_orientation,
    bgcolor = plotly::toRGB(legend_bg),
    bordercolor = plotly::toRGB(legend_border_col),
    borderwidth = legend_borderwidth,
    tracegroupgap = legend_group_gap
  )

  plt <- plotly::layout(
    plt,
    xaxis = list(
      autorange = xaxis_autorange,
      showgrid = showgrid_x,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      zeroline = FALSE,
      showticklabels = FALSE,
      automargin = automargin_x
    ),
    yaxis = list(
      autorange = yaxis_autorange,
      showgrid = showgrid_y,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      zeroline = FALSE,
      showticklabels = FALSE,
      automargin = automargin_y,
      scaleanchor = scaleanchor_y,
      scaleratio = scaleratio_y
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = main_xref,
      yref = main_yref,
      xanchor = main_xanchor,
      yanchor = main_yanchor,
      x = main_xy[1],
      y = main_xy[2]
    ),
    paper_bgcolor = theme[["bg"]],
    plot_bgcolor = theme[["plot_bg"]],
    margin = margin,
    legend = .legend,
    hoverlabel = list(
      align = hoverlabel_align
    )
  )

  # Config
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    ),
    scrollZoom = TRUE
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  return(plt)
} # /rtemis::draw_protein

aa <- data.frame(
  Abbreviation1 = c(
    "A",
    "R",
    "N",
    "D",
    "C",
    "Q",
    "E",
    "G",
    "H",
    "I",
    "L",
    "K",
    "M",
    "F",
    "P",
    "S",
    "T",
    "W",
    "Y",
    "V",
    "B",
    "Z",
    "X",
    ""
  ),
  Abbreviation3 = c(
    "Ala",
    "Arg",
    "Asn",
    "Asp",
    "Cys",
    "Gln",
    "Glu",
    "Gly",
    "His",
    "Ile",
    "Leu",
    "Lys",
    "Met",
    "Phe",
    "Pro",
    "Ser",
    "Thr",
    "Trp",
    "Tyr",
    "Val",
    "Asx",
    "Glx",
    "Xaa",
    "TERM"
  ),
  Name = c(
    "Alanine",
    "Arginine",
    "Asparagine",
    "Aspartate",
    "Cysteine",
    "Glutamine",
    "Glutamate",
    "Glycine",
    "Histidine",
    "Isoleucine",
    "Leucine",
    "Lysine",
    "Methionine",
    "Phenylalanine",
    "Proline",
    "Serine",
    "Threonine",
    "Tryptophan",
    "Tyrosine",
    "Valine",
    "Aspartic acid or Asparagine",
    "Glutamine or Glutamic acid",
    "(Any)",
    "Termination codon"
  )
)

poly_xys <- function(xs, ys, d, pad = 1, bpadcx = .5, bpadcy = .5) {
  n <- length(xs)
  dk <- rep(1, n)
  kinks <- which("rb" == d | "lb" == d)
  for (i in kinks) {
    if ((i + 1) <= n) {
      dk[(i + 1):n] <- -dk[(i + 1):n]
    }
  }

  # première ----
  px_1 <- switch(
    d[1],
    "r" = xs[1] - pad,
    "l" = xs[1] + pad,
    "rb" = c(xs[1] - pad, xs[1]),
    "lb" = c(xs[1] + pad, xs[1])
  )
  py_1 <- switch(
    d[1],
    "rb" = rep(ys[1] - pad, 2),
    "lb" = rep(ys[1] - pad, 2),
    ys[1] - pad
  )

  # aller ----
  # k: IDI of individual amino acid within individual region
  px_aller <-
    sapply(seq_along(d), \(k) {
      if (d[k] == "rb") {
        # rep(xs[k] + sqrt(.5 * pad^2), 2)
        rep(xs[k] + pad, 2)
      } else if (d[k] == "lb") {
        # rep(xs[k] - sqrt(.5 * pad^2), 2)
        rep(xs[k] - pad, 2)
      } else {
        xs[k]
      }
    }) |>
    unlist()

  py_aller <-
    sapply(seq_along(d), \(k) {
      if (d[[k]] %in% c("l", "r")) {
        if (dk[k] == -1) {
          ys[k] + pad
        } else {
          ys[k] - pad
        }
      } else {
        if (k == 1) {
          c(ys[k] - pad, ys[k] + sqrt(.5 * pad^2))
        } else if (k == length(d)) {
          c(ys[k] - sqrt(.5 * pad^2), ys[k] + pad)
        } else {
          c(ys[k] - sqrt(.5 * pad^2), ys[k] + sqrt(.5 * pad^2))
        }
      }
    }) |>
    unlist()

  # centre ----
  dr <- rev(d)
  dkr <- rev(dk)
  xsr <- rev(xs)
  ysr <- rev(ys)
  px_centre <-
    switch(
      dr[1],
      "r" = rep(xsr[1] + pad, 2),
      "l" = rep(xsr[1] - pad, 2),
      # "rb" = c(xsr[1], xsr[1] - sqrt(.5 * pad^2)),
      # "lb" = c(xsr[1], xsr[1] + sqrt(.5 * pad^2))
      "rb" = c(xsr[1], xsr[1] - pad),
      "lb" = c(xsr[1], xsr[1] + pad)
    )

  py_centre <-
    if (dr[1] %in% c("r", "l")) {
      if (length(kinks) > 0) {
        c(ysr[1] + pad, ysr[1] - pad)
      } else {
        c(ysr[1] - pad, ysr[1] + pad)
      }
    } else {
      rep(ysr[1] + pad, 2)
    }

  # retour ----
  px_retour <-
    sapply(seq_along(dr), \(k) {
      if (dr[k] == "rb") {
        if (k == 1 | k == length(dr)) {
          rep(xsr[k] - pad, 2)
        } else {
          rep(xsr[k] - 1.5 * sqrt(.5 * pad^2), 2)
        }
      } else if (dr[k] == "lb") {
        if (k == 1 | k == length(dr)) {
          rep(xsr[k] + pad, 2)
        } else {
          rep(xsr[k] + 1.5 * sqrt(.5 * pad^2), 2)
        }
      } else {
        xsr[k]
      }
    }) |>
    unlist()

  py_retour <-
    sapply(seq_along(dr), \(k) {
      if (dr[[k]] %in% c("l", "r")) {
        if (dkr[k] == -1) {
          ysr[k] - pad
        } else {
          ysr[k] + pad
        }
      } else {
        rep(ysr[k], 2)
      }
    }) |>
    unlist()

  # find point before and after rb/lb
  idirb <- which(d == "rb")
  if (length(idirb) > 0) {
    if (idirb > 1) {
      px_aller[idirb - 1] <- px_aller[idirb - 1] + sqrt(.5 * pad^2)
    }
    if ((idirb + 1) <= length(d)) {
      px_aller[idirb + 2] <- px_aller[idirb + 2] + sqrt(.5 * pad^2)
    }
  }

  idilb <- which(d == "lb")
  if (length(idilb) > 0) {
    if (idilb > 1) {
      px_aller[idilb - 1] <- px_aller[idilb - 1] - sqrt(.5 * pad^2)
    }
    if ((idilb + 1) <= length(d)) {
      px_aller[idilb + 2] <- px_aller[idilb + 2] - sqrt(.5 * pad^2)
    }
  }

  # pénultième ----
  py_pen <- if (d[1] %in% c("rb", "lb")) {
    ys[1] - sqrt(.5 * pad^2)
  } else {
    ys[1] + pad
  }

  # out ----
  list(
    px = c(px_1, px_aller, px_centre, px_retour, px_1[1], px_1[1]),
    py = c(py_1, py_aller, py_centre, py_retour, py_pen, py_1[1])
  )
}


qrtpad <- function(i, pad = .3) {
  qrt <- sqrt(.5 * pad^2)
  switch(
    i,
    `1` = c(qrt, -qrt),
    `2` = c(pad, 0),
    `3` = c(qrt, qrt),
    `4` = c(0, pad),
    `5` = c(-qrt, qrt),
    `6` = c(-pad, 0),
    `7` = c(-qrt, -qrt)
  )
}

# npad: function to calculate circular offset of a point from the center of a region
# by dividing circle into n equal parts, beginning from the top
npad <- function(i, n = 12, pad = .3) {
  angle <- 2 * pi / n
  x <- sin(angle * i) * pad
  y <- cos(angle * i) * pad
  c(x, y)
}


================================================
FILE: R/draw_pvals.R
================================================
# draw_pvals.R
# ::rtemis::
# 2021 EDG rtemis.org

#' Barplot p-values using [draw_bar]
#'
#' Plot 1 - p-values as a barplot
#'
#' @param x Float, vector: p-values.
#' @param xnames Character, vector: feature names.
#' @param yname Character: outcome name.
#' @param p_adjust_method Character: method for [p.adjust].
#' @param pval_hline Float: Significance level at which to plot horizontal line.
#' @param hline_col Color for `pval_hline`.
#' @param hline_dash Character: type of line to draw.
#' @param ... Additional arguments passed to [draw_bar].
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' draw_pvals(c(0.01, 0.02, 0.03), xnames = c("Feature1", "Feature2", "Feature3"))
draw_pvals <- function(
  x,
  xnames = NULL,
  yname = NULL,
  p_adjust_method = "none",
  pval_hline = .05,
  hline_col = rt_red,
  hline_dash = "dash",
  ...
) {
  if (is.null(xnames)) {
    xnames <- names(x)
  }
  if (is.null(yname)) {
    yname <- deparse(substitute(x))
  }

  draw_bar(
    1 - p.adjust(x, method = p_adjust_method),
    group_names = xnames,
    legend = FALSE,
    ylab = if (p_adjust_method == "none") {
      "1 - p-value"
    } else {
      paste0("1 - ", p_adjust_method, "-adjusted p-value")
    },
    hline = 1 - pval_hline,
    hline_col = hline_col,
    hline_dash = hline_dash,
    ...
  )
} # /rtemis::draw_pvals


================================================
FILE: R/draw_roc.R
================================================
# draw_roc.R
# ::rtemis::
# 2025 EDG rtemis.org

#' Draw ROC curve
#'
#' @param true_labels Factor: True outcome labels.
#' @param predicted_prob Numeric vector \[0, 1\]: Predicted probabilities for the positive class (i.e. second level of outcome).
#' Or, for multiclass, a matrix of predicted probabilities with one column per class.
#' Or, a list of such vectors/matrices to draw multiple ROC curves on the same plot.
#' @param multiclass_fill_labels Logical: If TRUE, fill in labels for multiclass ROC curves.
#' If FALSE, column names of `predicted_prob` must match levels of `true_labels`.
#' @param main Character: Main title for the plot.
#' @param theme `Theme` object.
#' @param palette Character vector: Colors to use.
#' @param legend Logical: If TRUE, draw legend.
#' @param legend_title Character: Title for the legend.
#' @param legend_xy Numeric vector: Position of the legend in the form c(x, y).
#' @param legend_xanchor Character: X anchor for the legend.
#' @param legend_yanchor Character: Y anchor for the legend.
#' @param auc_dp Integer: Number of decimal places for AUC values.
#' @param xlim Numeric vector: Limits for the x-axis.
#' @param ylim Numeric vector: Limits for the y-axis.
#' @param diagonal Logical: If TRUE, draw diagonal line.
#' @param diagonal_col Character: Color for the diagonal line.
#' @param axes_square Logical: If TRUE, make axes square.
#' @param filename Character: If provided, save the plot to this file.
#' @param ... Additional arguments passed to [draw_scatter].
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # Binary classification
#' true_labels <- factor(c("A", "B", "A", "A", "B", "A", "B", "B", "A", "B"))
#' predicted_prob <- c(0.1, 0.4, 0.35, 0.8, 0.65, 0.2, 0.9, 0.55, 0.3, 0.7)
#' draw_roc(true_labels, predicted_prob)
draw_roc <- function(
  true_labels,
  predicted_prob,
  multiclass_fill_labels = TRUE,
  main = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  legend = TRUE,
  legend_title = "Group (AUC)",
  legend_xy = c(1, 0),
  legend_xanchor = "right",
  legend_yanchor = "bottom",
  auc_dp = 3L,
  xlim = c(-0.05, 1.05),
  ylim = c(-0.05, 1.05),
  diagonal = TRUE,
  diagonal_col = NULL,
  axes_square = TRUE,
  filename = NULL,
  ...
) {
  # List of probabilities
  probl <- if (!is.list(predicted_prob)) {
    list(predicted_prob)
  } else {
    predicted_prob
  }
  labelsl <- if (!is.list(true_labels)) {
    list(true_labels)
  } else {
    true_labels
  }
  # Check N sets
  if (length(probl) != length(labelsl)) {
    cli::cli_abort(
      "You must have the same N of sets of `predicted_prob` and `true_labels`."
    )
  }

  # Binary vs. Multiclass
  # Determine number of classes from number of columns in predicted_prob
  # If ncol is NULL, it is binary classification
  n_classes <- unique(sapply(probl, \(x) {
    if (is.null(ncol(x))) {
      2L
    } else {
      ncol(x)
    }
  }))

  if (length(n_classes) > 1) {
    cli::cli_abort(
      "You must have the same number of classes in each set of `predicted_prob`."
    )
  }

  # Check lengths of corresponding sets
  # NROW() works for both vectors and matrices
  for (i in seq_along(probl)) {
    if (NROW(probl[[i]]) != length(labelsl[[i]])) {
      cli::cli_abort(
        "You must have the same N of `predicted_prob` and `true_labels`."
      )
    }
  }

  if (n_classes == 2L) {
    .roc <- lapply(seq_along(probl), \(i) {
      pROC::roc(
        response = labelsl[[i]],
        predictor = probl[[i]],
        levels = levels(labelsl[[i]]),
        direction = "<"
      )
    })
  } else {
    .roc <- lapply(seq_along(probl), \(i) {
      pred <- probl[[i]]
      if (is.null(colnames(pred))) {
        if (multiclass_fill_labels) {
          colnames(pred) <- levels(labelsl[[i]])
        } else {
          cli::cli_abort(
            "For multiclass, `predicted_prob` must have column names matching levels of `true_labels`."
          )
        }
      }
      pROC::multiclass.roc(
        response = labelsl[[i]],
        predictor = pred,
        levels = levels(labelsl[[i]])
      )
    })
  }

  .names <- names(probl)

  if (n_classes == 2L) {
    TPR <- lapply(.roc, \(r) r[["sensitivities"]])
    FPR <- lapply(.roc, \(r) 1 - r[["specificities"]])
    AUC <- lapply(.roc, \(r) r[["auc"]])
  } else {
    TPR <- lapply(.roc, \(r) r[["rocs"]][[1]][["sensitivities"]])
    FPR <- lapply(.roc, \(r) 1 - r[["rocs"]][[1]][["specificities"]])
    AUC <- lapply(.roc, \(r) r[["auc"]])
  }
  names(TPR) <- names(FPR) <- names(AUC) <- .names
  theme@config[["zerolines"]] <- FALSE
  draw_scatter(
    x = FPR,
    y = TPR,
    xlab = "False Positive Rate",
    ylab = "True Positive Rate",
    main = main,
    theme = theme,
    palette = palette,
    mode = "lines",
    group_names = paste0(.names, " (", ddSci(unlist(AUC), auc_dp), ")"),
    legend = legend,
    legend_title = legend_title,
    legend_xy = legend_xy,
    legend_xanchor = legend_xanchor,
    legend_yanchor = legend_yanchor,
    xlim = xlim,
    ylim = ylim,
    diagonal = diagonal,
    diagonal_col = diagonal_col,
    axes_square = axes_square,
    order_on_x = FALSE,
    filename = filename,
    ...
  )
} # /rtemis::draw_roc


================================================
FILE: R/draw_scatter.R
================================================
# draw_scatter.R
# ::rtemis::
# 2019- EDG rtemis.org

#' Interactive Scatter Plots
#'
#' Draw interactive scatter plots using `plotly`.
#'
#' @param x Numeric, vector/data.frame/list: x-axis data. If y is NULL and `NCOL(x) > 1`, first two columns used as `x` and `y`, respectively.
#' @param y Numeric, vector/data.frame/list: y-axis data.
#' @param fit Character: Fit method.
#' @param se_fit Logical: If TRUE, include standard error of the fit.
#' @param se_times Numeric: Multiplier for standard error.
#' @param include_fit_name Logical: If TRUE, include fit name in legend.
#' @param cluster Character: Clustering method.
#' @param cluster_config List: Config for clustering.
#' @param group Factor: Grouping variable.
# @param formula Formula: Formula for non-linear least squares fit.
#' @param rsq Logical: If TRUE, print R-squared values in legend if `fit` is set.
#' @param mode Character, vector: "markers", "lines", "markers+lines".
#' @param order_on_x Logical: If TRUE, order `x` and `y` on `x`.
#' @param main Character: Main title.
#' @param subtitle Character: Subtitle.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param alpha Numeric: Alpha for markers.
#' @param theme `Theme` object.
#' @param palette Character vector: Colors to use.
#' @param axes_square Logical: If TRUE, draw a square plot.
#' @param group_names Character: Names for groups.
#' @param font_size Numeric: Font size.
#' @param marker_col Color for markers.
#' @param marker_size Numeric: Marker size.
#' @param symbol Character: Marker symbol.
#' @param fit_col Color for fit line.
#' @param fit_alpha Numeric: Alpha for fit line.
#' @param fit_lwd Numeric: Line width for fit line.
#' @param line_shape Character: Line shape for line plots. Options: "linear", "hv", "vh", "hvh", "vhv".
#' @param se_col Color for standard error band.
#' @param se_alpha Numeric: Alpha for standard error band.
#' @param scatter_type Character: Scatter plot type.
#' @param show_marginal_x Logical: If TRUE, add marginal distribution line markers on x-axis.
#' @param show_marginal_y Logical: If TRUE, add marginal distribution line markers on y-axis.
#' @param marginal_x Numeric: Data for marginal distribution on x-axis.
#' @param marginal_y Numeric: Data for marginal distribution on y-axis.
#' @param marginal_x_y Numeric: Y position of marginal markers on x-axis.
#' @param marginal_y_x Numeric: X position of marginal markers on y-axis.
#' @param marginal_col Color for marginal markers.
#' @param marginal_alpha Numeric: Alpha for marginal markers.
#' @param marginal_size Numeric: Size of marginal markers.
#' @param legend Logical: If TRUE, draw legend.
#' @param legend_title Character: Title for legend.
#' @param legend_trace Logical: If TRUE, draw legend trace. (For when you have `fit` and don't want a trace for the markers.)
#' @param legend_xy Numeric: Position of legend.
#' @param legend_xanchor Character: X anchor for legend.
#' @param legend_yanchor Character: Y anchor for legend.
#' @param legend_orientation Character: Orientation of legend.
#' @param legend_col Color for legend text.
#' @param legend_bg Color for legend background.
#' @param legend_border_col Color for legend border.
#' @param legend_borderwidth Numeric: Border width for legend.
#' @param legend_group_gap Numeric: Gap between legend groups.
#' @param x_showspikes Logical: If TRUE, show spikes on x-axis.
#' @param y_showspikes Logical: If TRUE, show spikes on y-axis.
#' @param spikedash Character: Dash type for spikes.
#' @param spikemode Character: Spike mode.
#' @param spikesnap Character: Spike snap mode.
#' @param spikecolor Color for spikes.
#' @param spikethickness Numeric: Thickness of spikes.
#' @param margin List: Plot margins.
#' @param main_y Numeric: Y position of main title.
#' @param main_yanchor Character: Y anchor for main title.
#' @param subtitle_x Numeric: X position of subtitle.
#' @param subtitle_y Numeric: Y position of subtitle.
#' @param subtitle_xref Character: X reference for subtitle.
#' @param subtitle_yref Character: Y reference for subtitle.
#' @param subtitle_xanchor Character: X anchor for subtitle.
#' @param subtitle_yanchor Character: Y anchor for subtitle.
#' @param automargin_x Logical: If TRUE, automatically adjust x-axis margins.
#' @param automargin_y Logical: If TRUE, automatically adjust y-axis margins.
#' @param xlim Numeric: Limits for x-axis.
#' @param ylim Numeric: Limits for y-axis.
#' @param axes_equal Logical: If TRUE, set equal scaling for axes.
#' @param diagonal Logical: If TRUE, add diagonal line.
#' @param diagonal_col Color for diagonal line.
#' @param diagonal_dash Character: "solid", "dash", "dot", "dashdot", "longdash", "longdashdot". Dash type for diagonal line.
#' @param diagonal_alpha Numeric: Alpha for diagonal line.
#' @param fit_params `Hyperparameters` for fit.
#' @param vline Numeric: X position for vertical line.
#' @param vline_col Color for vertical line.
#' @param vline_width Numeric: Width for vertical line.
#' @param vline_dash Character: Dash type for vertical line.
#' @param hline Numeric: Y position for horizontal line.
#' @param hline_col Color for horizontal line.
#' @param hline_width Numeric: Width for horizontal line.
#' @param hline_dash Character: Dash type for horizontal line.
#' @param hovertext List: Hover text for markers.
#' @param width Numeric: Width of plot.
#' @param height Numeric: Height of plot.
#' @param displayModeBar Logical: If TRUE, display mode bar.
#' @param modeBar_file_format Character: File format for mode bar.
#' @param scrollZoom Logical: If TRUE, enable scroll zoom.
#' @param filename Character: Filename to save plot.
#' @param file_width Numeric: Width of saved file.
#' @param file_height Numeric: Height of saved file.
#' @param file_scale Numeric: Scale of saved file.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' draw_scatter(iris$Sepal.Length, iris$Petal.Length,
#'   fit = "gam", se_fit = TRUE, group = iris$Species
#' )
draw_scatter <- function(
  x,
  y = NULL,
  fit = NULL,
  se_fit = FALSE,
  se_times = 1.96,
  include_fit_name = TRUE,
  cluster = NULL,
  cluster_config = list(k = 2),
  group = NULL,
  # formula = NULL,
  rsq = TRUE,
  mode = "markers",
  order_on_x = NULL,
  main = NULL,
  subtitle = NULL,
  xlab = NULL,
  ylab = NULL,
  alpha = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  axes_square = FALSE,
  group_names = NULL,
  font_size = 16,
  marker_col = NULL,
  marker_size = 8,
  symbol = "circle",
  fit_col = NULL,
  fit_alpha = .8,
  fit_lwd = 2.5,
  line_shape = "linear",
  se_col = NULL,
  se_alpha = .4,
  scatter_type = "scatter",
  show_marginal_x = FALSE,
  show_marginal_y = FALSE,
  marginal_x = x,
  marginal_y = y,
  marginal_x_y = NULL,
  marginal_y_x = NULL,
  marginal_col = NULL,
  marginal_alpha = .333,
  marginal_size = 10,
  legend = NULL,
  legend_title = NULL,
  legend_trace = TRUE,
  legend_xy = c(0, .98),
  legend_xanchor = "left",
  legend_yanchor = "auto",
  legend_orientation = "v",
  legend_col = NULL,
  legend_bg = "#FFFFFF00",
  legend_border_col = "#FFFFFF00",
  legend_borderwidth = 0,
  legend_group_gap = 0,
  x_showspikes = FALSE,
  y_showspikes = FALSE,
  spikedash = "solid",
  spikemode = "across",
  spikesnap = "hovered data",
  spikecolor = NULL,
  spikethickness = 1,
  margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0),
  main_y = 1.01,
  main_yanchor = "bottom",
  subtitle_x = 0.02,
  subtitle_y = 0.99,
  subtitle_xref = "paper",
  subtitle_yref = "paper",
  subtitle_xanchor = "left",
  subtitle_yanchor = "top",
  automargin_x = TRUE,
  automargin_y = TRUE,
  xlim = NULL,
  ylim = NULL,
  axes_equal = FALSE,
  diagonal = FALSE,
  diagonal_col = NULL,
  diagonal_dash = "dot",
  diagonal_alpha = .66,
  fit_params = NULL,
  vline = NULL,
  vline_col = theme[["fg"]],
  vline_width = 1,
  vline_dash = "dot",
  hline = NULL,
  hline_col = theme[["fg"]],
  hline_width = 1,
  hline_dash = "dot",
  hovertext = NULL,
  width = NULL,
  height = NULL,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  scrollZoom = TRUE,
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  verbosity = 0L
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Arguments ----
  xname <- labelify(gsub(".*\\$", "", deparse(substitute(x))))
  yname <- labelify(gsub(".*\\$", "", deparse(substitute(y))))
  if (is.null(y) && NCOL(x) > 1) {
    if (is.null(xlab)) {
      xlab <- labelify(colnames(x)[1])
    }
    if (is.null(ylab)) {
      ylab <- labelify(colnames(x)[2])
    }
    y <- x[, 2]
    x <- x[, 1]
  }
  if (!is.null(fit)) {
    if (fit == "none") fit <- NULL
  } # easier to work with shiny
  if (is.logical(fit)) {
    if (fit) fit <- "GAM"
  }
  if (is.null(fit)) {
    se_fit <- FALSE
  }
  if (!is.null(fit)) {
    fit <- toupper(fit)
  }
  if (!is.null(main)) {
    main <- paste0("<b>", main, "</b>")
  }
  .mode <- mode
  .names <- group_names

  check_is_S7(theme, Theme)

  if (se_fit) {
    if (!fit %in% c("GLM", "LM", "LOESS", "GAM", "NW")) {
      warning(paste(
        "Standard error of the fit not available for",
        fit,
        "- try LM, LOESS, GAM, or NW"
      ))
      se_fit <- FALSE
    }
  }

  # order_on_x ----
  if (is.null(order_on_x)) {
    order_on_x <- if (!is.null(fit) || any(grepl("lines", mode))) {
      TRUE
    } else {
      FALSE
    }
  }

  # Cluster ----
  if (!is.null(cluster)) {
    group <- suppressWarnings(
      cluster(
        x = data.frame(x, y),
        algorithm = cluster,
        config = do_call(
          get_clust_setup_fn(cluster),
          cluster_config
        )
      )@clusters
    )
    group <- paste("Cluster", group)
  }

  # Data ----
  # xlab, ylab ----
  # The gsubs remove all text up to and including a "$" symbol if present
  if (is.null(xlab)) {
    if (is.list(x)) xlab <- "x" else xlab <- xname
  }
  if (!is.null(y) && is.null(ylab)) {
    if (is.list(y)) ylab <- "y" else ylab <- yname
  }

  # Group ----
  if (!is.null(group)) {
    group <- as.factor(group)
    x <- split(x, group, drop = TRUE)
    y <- split(y, group, drop = TRUE)
    if (is.null(group_names)) {
      group_names <- levels(droplevels(group))
    }
    names(x) <- names(y) <- .names <- group_names
    if (!is.null(hovertext)) hovertext <- split(hovertext, group, drop = TRUE)
  }

  # Try to get names from list or data frame inputs
  if (is.list(y) || NCOL(y) > 1) {
    if (is.null(.names) && !is.null(names(y))) .names <- names(y)
  }
  if (is.list(x) || NCOL(x) > 1) {
    if (is.null(.names) && !is.null(names(x))) .names <- names(x)
  }

  # Data to lists ----
  x <- if (!is.list(x)) as.list(as.data.frame(x)) else x
  y <- if (!is.null(y) && !is.list(y)) as.list(as.data.frame(y)) else y
  hovertext <- if (!is.null(hovertext) && !is.list(hovertext)) {
    as.list(as.data.frame(hovertext))
  } else {
    hovertext
  }
  if (length(x) == 1 && length(y) > 1) {
    x <- rep(x, length(y))
    .names <- names(y)
  }
  if (length(y) == 1 && length(x) > 1) {
    y <- rep(y, length(x))
    .names <- names(x)
  }
  if (!is.null(hovertext) && length(hovertext) == 1 && length(x) > 1) {
    hovertext <- rep(hovertext, length(x))
  }
  n_groups <- length(x)

  if (is.null(legend)) {
    legend <- if (n_groups == 1 && is.null(fit)) FALSE else TRUE
  }

  if (length(.mode) < n_groups) {
    .mode <- c(.mode, rep(tail(.mode)[1], n_groups - length(.mode)))
  }

  # if (is.null(legend)) legend <- n_groups > 1
  if (is.null(.names)) {
    if (n_groups > 1) {
      .names <- paste("Group", seq_len(n_groups))
    } else {
      # .names <- if (!is.null(fit)) fit else NULL
      .names <- xname
    }
  }

  # Marginal data ----
  if (show_marginal_x && is.null(marginal_x)) {
    marginal_x <- x
  }
  if (show_marginal_y && is.null(marginal_y)) {
    marginal_y <- y
  }

  # Reorder ----
  if (order_on_x) {
    index <- lapply(x, order)
    x <- lapply(seq(x), \(i) x[[i]][index[[i]]])
    y <- lapply(seq(x), \(i) y[[i]][index[[i]]])
    if (!is.null(hovertext)) {
      hovertext <- lapply(seq(x), \(i) hovertext[[i]][index[[i]]])
    }
  }

  # Colors ----
  col <- recycle(palette, seq_len(n_groups))
  if (is.null(alpha)) {
    alpha <- if (mode == "markers") {
      autoalpha(max(lengths(x)))
    } else {
      1
    }
  }

  # Theme ----
  if (diagonal) {
    if (is.null(diagonal_col)) {
      diagonal_col <- theme[["fg"]]
    }
    diagonal_col <- adjustcolor(diagonal_col, diagonal_alpha)
  }

  bg <- plotly::toRGB(theme[["bg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]], theme[["grid_alpha"]])
  tick_col <- plotly::toRGB(theme[["tick_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])
  if (!theme[["axes_visible"]]) {
    tick_col <- labs_col <- "transparent"
  }

  # marker_col, se_col ===
  if (is.null(marker_col)) {
    marker_col <- if (!is.null(fit) && n_groups == 1) {
      as.list(rep(theme[["fg"]], n_groups))
    } else {
      col
    }
  }

  if (!is.null(fit)) {
    if (is.null(fit_col)) fit_col <- col
  }

  if (se_fit && is.null(se_col)) {
    se_col <- col
  }

  if (is.null(legend_col)) {
    legend_col <- labs_col
  }
  if (is.null(spikecolor)) {
    spikecolor <- theme[["fg"]]
  }

  # Size ----
  # fitted & se_fit ----
  # If plotting se bands, need to include (fitted +/- se_times * se) in the axis limits
  if (se_fit) {
    se <- list()
  } else {
    se <- NULL
  }
  if (!is.null(fit)) {
    fitted <- list()
    fitted_text <- character()
    for (i in seq_len(n_groups)) {
      mod <- train(
        x = data.frame(x = x[[i]], y = y[[i]]),
        algorithm = fit,
        hyperparameters = fit_params,
        verbosity = verbosity - 1L
      )
      fitted[[i]] <- fitted(mod)
      if (se_fit) {
        se[[i]] <- se(mod)
      }
      if (include_fit_name) {
        # fitted_text[i] <- switch(fit,
        #   NLS = mod$extra$model,
        #   NLA = mod$mod$formula,
        #   fit
        # )
        fitted_text[i] <- fit
      } else {
        fitted_text[i] <- ""
      }
      if (rsq) {
        fitted_text[i] <- paste0(
          fitted_text[i],
          if (n_groups == 1) " (" else " ",
          "R<sup>2</sup> = ",
          ddSci(mod@metrics_training[["Rsq"]]),
          if (n_groups == 1) ")"
        )
      }
      # if (rsq_pval) {
      #   if (fit  %in% c("LM", "GLM")) {
      #     rsqp[[i]] <- paste0(ddSci(mod@metrics_training$Rsq), " (",
      #                          ddSci(summary(mod$mod)$coefficients[2, 4]), ")")
      #   } else if (fit == "GAM") {
      #     rsqp[[i]] <- paste0(ddSci(mod@metrics_training$Rsq), " (",
      #                          ddSci(summary(mod$mod)$s.pv), ")")
      #   }
      # }
    }
  }

  # Axes Limits ----
  if (axes_equal) {
    if (is.null(xlim)) {
      xlim <- getlim(unlist(x), "r", .06)
    }
    if (is.null(ylim)) {
      ylim <- getlim(unlist(y), "r", .06)
      if (is.list(fitted) && !is.list(se)) {
        ylim_hi <- max(unlist(fitted))
        ylim_lo <- min(unlist(fitted))
        ylim <- range(ylim_lo, ylim_hi, y)
      }
      if (is.list(se)) {
        ylim_hi <- max(unlist(lapply(
          seq_along(fitted),
          function(i) {
            as.data.frame(fitted[[i]]) +
              se_times * as.data.frame(se[[i]])
          }
        )))
        ylim_lo <- min(unlist(lapply(
          seq_along(fitted),
          function(i) {
            as.data.frame(fitted[[i]]) -
              se_times * as.data.frame(se[[i]])
          }
        )))
        ylim <- range(ylim_lo, ylim_hi, y)
      }
    }

    xlim <- ylim <- range(xlim, ylim)
  } # /axes_equal

  # unlist will coerce Dates to numeric, also don't want padding
  if (is.null(xlim) && !inherits(x[[1]], "Date")) {
    xlim <- getlim(unlist(x), "r", .06)
  }
  if (is.null(ylim) && !inherits(y[[1]], "Date")) {
    ylim <- getlim(unlist(y), "r", .06)
  }

  # plotly ----
  if (!is.null(fit) && rsq) {
    if (!include_fit_name) {
      fitted_text <- gsub("^ ", "", fitted_text)
    }
    if (n_groups > 1) {
      .names <- paste0(.names, " (", fitted_text, ")")
    }
  }

  plt <- plotly::plot_ly(
    width = width,
    height = height
  )

  if (diagonal) {
    lo <- min(xlim[1], ylim[1])
    hi <- max(xlim[2], ylim[2])
    plt <- plotly::layout(
      plt,
      shapes = list(
        type = "line",
        x0 = lo,
        x1 = hi,
        y0 = lo,
        y1 = hi,
        line = list(
          color = diagonal_col,
          dash = diagonal_dash
        )
      )
    )
  }

  for (i in seq_len(n_groups)) {
    ## { Scatter } ----
    marker <- if (grepl("markers", .mode[i])) {
      list(
        color = plotly::toRGB(marker_col[[i]], alpha = alpha),
        size = marker_size,
        symbol = symbol
      )
    } else {
      NULL
    }
    plt <- plotly::add_trace(
      plt,
      x = x[[i]],
      y = y[[i]],
      type = scatter_type,
      mode = .mode[i],
      # fillcolor = plotly::toRGB(col[[i]], alpha),
      name = .names[i],
      # text = .text[[i]],
      # hoverinfo = "text",
      text = hovertext[[i]],
      marker = marker,
      line = if (grepl("lines", .mode[i])) {
        list(
          color = plotly::toRGB(marker_col[[i]], alpha = alpha),
          shape = line_shape
        )
      } else {
        NULL
      },
      legendgroup = if (legend_trace) {
        .names[i]
      } else {
        paste0(.names[i], "_marker")
      },
      showlegend = legend && legend_trace
    )
    # Marginal plots ----
    # Add marginal plots by plotting short vertical markers on the x and y axes
    if (show_marginal_x) {
      if (is.null(marginal_col)) {
        marginal_col <- plotly::toRGB(marker_col, alpha = marginal_alpha)
      }
      if (is.null(marginal_x_y)) {
        marginal_x_y <- ylim[1]
      }
      # Extend ylim to include marginal markers
      ylim[1] <- ylim[1] - 0.02 * diff(ylim)
      for (i in seq_len(n_groups)) {
        plt <- plotly::add_trace(
          plt,
          x = marginal_x[[i]],
          y = rep(marginal_x_y, length(marginal_x[[i]])),
          type = "scatter",
          mode = "markers",
          marker = list(
            color = marginal_col[[i]],
            size = marginal_size,
            symbol = "line-ns-open"
          ),
          showlegend = FALSE,
          hoverinfo = "x"
        )
      }
    } # /show_marginal_x

    if (show_marginal_y) {
      if (is.null(marginal_col)) {
        marginal_col <- plotly::toRGB(marker_col, alpha = marginal_alpha)
      }
      if (is.null(marginal_y_x)) {
        marginal_y_x <- xlim[1]
      }
      # Extend xlim to include marginal markers
      xlim[1] <- xlim[1] - 0.02 * diff(xlim)
      for (i in seq_len(n_groups)) {
        plt <- plotly::add_trace(
          plt,
          x = rep(marginal_y_x, length(marginal_y[[i]])),
          y = marginal_y[[i]],
          type = "scatter",
          mode = "markers",
          marker = list(
            color = marginal_col[[i]],
            size = marginal_size,
            symbol = "line-ew-open"
          ),
          showlegend = FALSE,
          hoverinfo = "y"
          # legendgroup = .names[i]
        )
      }
    } # /show_marginal_y

    ## { SE band } ----
    if (se_fit) {
      plt <- plotly::add_trace(
        plt,
        x = x[[i]],
        y = fitted[[i]] + se_times * se[[i]],
        type = scatter_type,
        mode = "lines",
        line = list(color = "transparent"),
        legendgroup = .names[i],
        showlegend = FALSE,
        hoverinfo = "none",
        inherit = FALSE
      )
      plt <- plotly::add_trace(
        plt,
        x = x[[i]],
        y = fitted[[i]] - se_times * se[[i]],
        type = scatter_type,
        mode = "lines",
        fill = "tonexty",
        fillcolor = plotly::toRGB(se_col[[i]], alpha = se_alpha),
        line = list(color = "transparent"),
        # name = shade_name,
        legendgroup = .names[i],
        showlegend = FALSE,
        hoverinfo = "none",
        inherit = FALSE
      )
    }
    if (!is.null(fit)) {
      ##  { Fitted line } ----
      lfit <- list(
        color = plotly::toRGB(fit_col[[i]], alpha = fit_alpha),
        width = fit_lwd
      )
      plt <- plotly::add_trace(
        plt,
        x = x[[i]],
        y = fitted[[i]],
        type = scatter_type,
        mode = "lines",
        line = lfit,
        name = fitted_text[i],
        legendgroup = .names[i],
        showlegend = if (legend & n_groups == 1) TRUE else FALSE,
        inherit = FALSE
      )
    }
  }

  # Layout ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )
  tickfont <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = theme[["tick_labels_col"]]
  )
  .legend <- list(
    title = list(
      text = legend_title,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = legend_col
      )
    ),
    x = legend_xy[1],
    xanchor = legend_xanchor,
    y = legend_xy[2],
    yanchor = legend_yanchor,
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = legend_col
    ),
    orientation = legend_orientation,
    bgcolor = plotly::toRGB(legend_bg),
    bordercolor = plotly::toRGB(legend_border_col),
    borderwidth = legend_borderwidth,
    tracegroupgap = legend_group_gap
  )

  zerocol <- adjustcolor(theme[["zerolines_col"]], theme[["zerolines_alpha"]])
  plt <- plotly::layout(
    plt,
    yaxis = list(
      title = ylab,
      showline = FALSE,
      showspikes = y_showspikes,
      spikecolor = spikecolor,
      spikedash = spikedash,
      spikemode = spikemode,
      spikesnap = spikesnap,
      spikethickness = spikethickness,
      titlefont = f,
      showgrid = theme[["grid"]],
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tickfont,
      zeroline = theme[["zerolines"]],
      zerolinecolor = zerocol,
      zerolinewidth = theme[["zerolines_lwd"]],
      range = ylim,
      automargin = automargin_y
    ),
    xaxis = list(
      title = list(text = xlab),
      showline = FALSE,
      showspikes = x_showspikes,
      spikecolor = spikecolor,
      spikedash = spikedash,
      spikemode = spikemode,
      spikesnap = spikesnap,
      spikethickness = spikethickness,
      # mirror = axes_mirrored,
      titlefont = f,
      showgrid = theme[["grid"]],
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tickfont,
      zeroline = theme[["zerolines"]],
      zerolinecolor = zerocol,
      zerolinewidth = theme[["zerolines_lwd"]],
      range = xlim,
      automargin = automargin_x
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]],
      yref = "paper",
      y = main_y,
      yanchor = main_yanchor
    ),
    # titlefont = list(),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    margin = margin,
    showlegend = legend,
    legend = .legend
  ) # /layout

  ## vline ----
  if (!is.null(vline)) {
    plt <- plotly::layout(
      plt,
      shapes = plotly_vline(
        vline,
        color = vline_col,
        width = vline_width,
        dash = vline_dash
      )
    )
  }

  ## hline ----
  if (!is.null(hline)) {
    plt <- plotly::layout(
      plt,
      shapes = plotly_hline(
        hline,
        color = hline_col,
        width = hline_width,
        dash = hline_dash
      )
    )
  }

  ## square ----
  if (axes_square) {
    plt <- plt |>
      plotly::layout(
        yaxis = list(
          scaleanchor = "x",
          scaleratio = 1
        )
      )
  }

  # Subtitle ----
  # add annotation at top left with same font as main title
  if (!is.null(subtitle)) {
    plt <- plt |>
      plotly::add_annotations(
        x = subtitle_x,
        y = subtitle_y,
        xref = subtitle_xref,
        yref = subtitle_yref,
        xanchor = subtitle_xanchor,
        yanchor = subtitle_yanchor,
        text = subtitle,
        showarrow = FALSE,
        font = list(
          family = theme[["font_family"]],
          size = font_size,
          color = main_col
        )
      )
  }

  # Config
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    ),
    scrollZoom = scrollZoom
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_scatter


#' True vs. Predicted Plot
#'
#' A `draw_scatter` wrapper for plotting true vs. predicted values
#'
#' @inheritParams draw_scatter
#' @param x Numeric, vector/data.frame/list: True values. If y is NULL and
#' `NCOL(x) > 1`, first two columns used as `x` and `y`, respectively
#' @param y Numeric, vector/data.frame/list: Predicted values
#' @param ... Additional arguments passed to [draw_scatter]
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' x <- rnorm(500)
#' y <- x + rnorm(500)
#' draw_fit(x, y)
draw_fit <- function(
  x,
  y,
  xlab = "True",
  ylab = "Predicted",
  fit = "glm",
  se_fit = TRUE,
  axes_square = TRUE,
  axes_equal = TRUE,
  diagonal = TRUE,
  ...
) {
  draw_scatter(
    x,
    y,
    xlab = xlab,
    ylab = ylab,
    fit = fit,
    se_fit = se_fit,
    axes_square = axes_square,
    axes_equal = axes_equal,
    diagonal = diagonal,
    ...
  )
} # /rtemis::draw_fit


================================================
FILE: R/draw_spectrogram.R
================================================
# draw_spectrogram.R
# ::rtemis::
# 2023 EDG rtemis.org
# https://plotly.com/r/heatmaps/

#' Interactive Spectrogram
#'
#' Draw interactive spectrograms using `plotly`
#'
#' To set custom colors, use a minimum of `lo` and `hi`, optionally also
#' `lomid`, `mid`, `midhi` colors and set `colorscale = NULL`.
#'
#' @param x Numeric: Time.
#' @param y Numeric: Frequency.
#' @param z Numeric: Power.
#' @param colorgrad_n Integer: Number of colors in the gradient.
#' @param colors Character: Custom colors for the gradient.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param zlab Character: z-axis label.
#' @param hover_xlab Character: x-axis label for hover.
#' @param hover_ylab Character: y-axis label for hover.
#' @param hover_zlab Character: z-axis label for hover.
#' @param zmin Numeric: Minimum value for color scale.
#' @param zmax Numeric: Maximum value for color scale.
#' @param zauto Logical: If TRUE, automatically set zmin and zmax.
#' @param hoverlabel_align Character: Alignment of hover labels.
#' @param colorscale Character: Color scale.
#' @param colorbar_y Numeric: Y position of colorbar.
#' @param colorbar_yanchor Character: Y anchor of colorbar.
#' @param colorbar_xpad Numeric: X padding of colorbar.
#' @param colorbar_ypad Numeric: Y padding of colorbar.
#' @param colorbar_len Numeric: Length of colorbar.
#' @param colorbar_title_side Character: Side of colorbar title.
#' @param showgrid Logical: If TRUE, show grid.
#' @param space Character: Color space for gradient.
#' @param lo Character: Low color for gradient.
#' @param lomid Character: Low-mid color for gradient.
#' @param mid Character: Mid color for gradient.
#' @param midhi Character: Mid-high color for gradient.
#' @param hi Character: High color for gradient.
#' @param grid_gap Integer: Space between cells.
#' @param limits Numeric, length 2: Determine color range. Default = NULL, which automatically centers values around 0.
#' @param main Character: Main title.
#' @param key_title Character: Title of the key.
#' @param showticklabels Logical: If TRUE, show tick labels.
#' @param theme `Theme` object.
#' @param font_size Numeric: Font size.
#' @param padding Numeric: Padding between cells.
#' @param displayModeBar Logical: If TRUE, display the plotly mode bar.
#' @param modeBar_file_format Character: File format for image exports from the mode bar.
#' @param filename Character: Filename to save the plot. Default is NULL.
#' @param file_width Numeric: Width of exported image.
#' @param file_height Numeric: Height of exported image.
#' @param file_scale Numeric: Scale of exported image.
#' @param ... Additional arguments to be passed to `heatmaply::heatmaply`.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # Example data
#' time <- seq(0, 10, length.out = 100)
#' freq <- seq(1, 100, length.out = 100)
#' power <- outer(time, freq, function(t, f) sin(t) * cos(f))
#'draw_spectrogram(
#'   x = time,
#'   y = freq,
#'   z = power
#' )
draw_spectrogram <- function(
  x,
  y,
  z,
  colorgrad_n = 101,
  colors = NULL,
  xlab = "Time",
  ylab = "Frequency",
  zlab = "Power",
  hover_xlab = xlab,
  hover_ylab = ylab,
  hover_zlab = zlab,
  zmin = NULL,
  zmax = NULL,
  zauto = TRUE,
  hoverlabel_align = "right",
  colorscale = "Jet",
  colorbar_y = .5,
  colorbar_yanchor = "middle",
  colorbar_xpad = 0,
  colorbar_ypad = 0,
  colorbar_len = .75,
  colorbar_title_side = "bottom",
  showgrid = FALSE,
  space = "rgb",
  lo = "#18A3AC",
  lomid = NULL,
  mid = NULL,
  midhi = NULL,
  hi = "#F48024",
  grid_gap = 0,
  limits = NULL,
  main = NULL,
  key_title = NULL,
  showticklabels = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  font_size = NULL,
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  ...
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Tick Labels ----
  if (is.null(showticklabels)) {
    showticklabels <- c(
      ifelse(NCOL(z) < 50, TRUE, FALSE),
      ifelse(NROW(z) < 50, TRUE, FALSE)
    )
  }

  if (is.null(font_size)) {
    font_size <- 17.0769 - 0.2692 * ncol(z)
  }

  # Limits ----
  if (is.null(limits)) {
    maxabs <- max(abs(z), na.rm = TRUE)
    if (.2 < maxabs && maxabs < 1) {
      maxabs <- 1
    }
    limits <- c(-maxabs, maxabs)
  }

  # Theme ----
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  fg <- plotly::toRGB(theme[["fg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]])
  # tick_col <- plotly::toRGB(theme[["tick_col"]])
  tick_labels_col <- plotly::toRGB(theme[["tick_labels_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])

  # Colors ----
  if (is.null(mid)) {
    mid <- theme[["bg"]]
  }
  colors <- colorgrad(
    n = colorgrad_n,
    colors = colors,
    space = space,
    lo = lo,
    lomid = lomid,
    mid = mid,
    midhi = midhi,
    hi = hi
  )

  # Plot ----
  plt <- plotly::plot_ly()
  plt <- plt |>
    plotly::add_trace(
      x = x,
      y = y,
      z = z,
      type = "heatmap",
      zauto = zauto,
      zmin = zmin,
      zmax = zmax,
      colorscale = colorscale,
      colors = colors,
      hovertemplate = paste0(
        hover_xlab,
        ":<b> %{x:.3f}</b><br>",
        hover_ylab,
        ":<b> %{y:.3f}</b><br>",
        hover_zlab,
        ":<b> %{z:.3f}</b><extra></extra>"
      ),
      showlegend = FALSE
    )

  # Layout ----
  # '- layout ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )
  tickfont <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = tick_labels_col
  )
  .legend <- list(
    font = list(
      family = theme[["font_family"]],
      size = font_size,
      color = fg
    )
  )

  plt <- plotly::layout(
    plt,
    yaxis = list(
      title = list(
        text = ylab,
        font = f
      ),
      titlefont = f,
      showgrid = showgrid,
      tickcolor = bg,
      showline = FALSE,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickfont = tickfont
    ),
    xaxis = list(
      title = list(
        text = xlab,
        font = f
      ),
      titlefont = f,
      showgrid = showgrid,
      tickcolor = bg,
      showline = FALSE,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickfont = tickfont
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]]
    ),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    legend = .legend,
    hoverlabel = list(align = hoverlabel_align)
  )

  # Manual theme colors

  ## y axis tick label colors
  # plt[["x"]][["layoutAttrs"]][[2]][["yaxis2"]][["tickfont"]][["color"]]
  ## x axis tick label colors
  # plt[["x"]][["layoutAttrs"]][[2]][["xaxis"]][["tickfont"]][["color"]] <- "rgba(255, 0, 0, 1)"
  ## edge lines must be invisible
  plt[["x"]][["layout"]][["yaxis"]][["linecolor"]] <- plt[["x"]][["layout"]][[
    "xaxis2"
  ]][["linecolor"]] <- theme[["bg"]]

  # Manual layout ----
  # Set padding
  plt[["sizingPolicy"]][["padding"]] <- padding

  # Colorbar ----
  # https://plotly.com/r/reference/#scatter-marker-colorbar
  plt <- plt |>
    plotly::colorbar(
      y = colorbar_y,
      yanchor = colorbar_yanchor,
      title = list(
        text = zlab,
        font = f,
        side = colorbar_title_side
      ),
      tickfont = tickfont,
      xpad = colorbar_xpad,
      ypad = colorbar_ypad,
      len = colorbar_len
    )

  # Config ----
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    )
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_spectrogram


================================================
FILE: R/draw_survfit.R
================================================
# draw_survfit.R
# ::rtemis::
# 2025 EDG rtemis.org

# draw_scatter(time, survival_prob, mode = "lines", line_shape = "hv")
# ?median lines, error bands, nrisk_table

#' Draw a survfit object
#'
#' Draw a `survfit` object using [draw_scatter].
#'
#' @inheritParams draw_scatter
#'
#' @param x `survfit` object created by [survival::survfit].
#' @param mode Character, vector: "markers", "lines", "markers+lines".
# @param plot_median Logical: If `TRUE`, draw line(s) at 50% survival.
#' @param xlim Numeric vector of length 2: x-axis limits.
#' @param ylim Numeric vector of length 2: y-axis limits.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param main Character: Main title.
#' @param symbol Character: Symbol to use for the points.
#' @param nrisk_table Logical: If `TRUE`, subplot a table of the number at risk at each time point.
#' @param ... Additional arguments passed to [draw_scatter].
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # Get the lung dataset
#' data(cancer, package = "survival")
#' sf1 <- survival::survfit(survival::Surv(time, status) ~ 1, data = lung)
#' draw_survfit(sf1)
#' sf2 <- survival::survfit(survival::Surv(time, status) ~ sex, data = lung)
#' draw_survfit(sf2)
#' # with N at risk table
#' draw_survfit(sf2)
draw_survfit <- function(
  x,
  # plot_median = TRUE,
  mode = "lines",
  symbol = "cross",
  line_shape = "hv",
  xlim = NULL,
  ylim = NULL,
  xlab = "Time",
  ylab = "Survival",
  main = NULL,
  legend_xy = c(1, 1),
  legend_xanchor = "right",
  legend_yanchor = "top",
  theme = choose_theme(getOption("rtemis_theme")),
  nrisk_table = FALSE,
  filename = NULL,
  ...
) {
  # Checks ----
  check_inherits(x, "survfit")

  # Data ----
  nstrata <- if (is.null(x[["strata"]])) {
    1
  } else {
    length(x[["strata"]])
  }

  if (nstrata > 1) {
    .group <- unlist(sapply(seq_len(nstrata), function(i) {
      rep(i, x[["strata"]][i])
    }))
  } else {
    .group <- rep(1, length(x[["time"]]))
  }

  # Limits ----
  if (is.null(xlim)) {
    xlim <- c(0, max(x[["time"]], na.rm = TRUE))
  }
  if (is.null(ylim)) {
    ylim <- c(0, 1)
  }

  # Plot ----
  draw_scatter(
    x = split(x[["time"]], .group),
    y = split(x[["surv"]], .group),
    xlim = xlim,
    ylim = ylim,
    xlab = xlab,
    ylab = ylab,
    main = main,
    theme = theme,
    mode = mode,
    symbol = symbol,
    line_shape = line_shape,
    filename = filename,
    legend_xy = legend_xy,
    legend_xanchor = legend_xanchor,
    legend_yanchor = legend_yanchor,
    ...
  )
} # /rtemis::draw_survfit


================================================
FILE: R/draw_table.R
================================================
# draw_table.R
# ::rtemis::
# 2019 EDG rtemis.org

#' Simple HTML table
#'
#' Draw an html table using `plotly`
#'
#' @param x data.frame: Table to draw
#' @param .ddSci Logical: If TRUE, apply [ddSci] to numeric columns.
#' @param main Character: Table tile.
#' @param main_col Color: Title color.
#' @param main_x Float \[0, 1\]: Align title: 0: left, .5: center, 1: right.
#' @param main_xanchor Character: "auto", "left", "right": plotly's layout xanchor for
#' title.
#' @param fill_col Color: Used to fill header with column names and first column with
#' row names.
#' @param table_bg Color: Table background.
#' @param bg Color: Background.
#' @param line_col Color: Line color.
#' @param lwd Float: Line width.
#' @param header_font_col Color: Header font color.
#' @param table_font_col Color: Table font color.
#' @param font_size Integer: Font size.
#' @param font_family Character: Font family.
#' @param margin List: plotly's margins.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#'  df <- data.frame(
#'    Name = c("Alice", "Bob", "Charlie"),
#'    Age = c(25, 30, 35),
#'    Score = c(90.5, 85.0, 88.0)
#' )
#' p <- draw_table(
#'   df,
#'   main = "Sample Table",
#'   main_col = "#00b2b2"
#' )
draw_table <- function(
  x,
  .ddSci = TRUE,
  main = NULL,
  main_col = "black",
  main_x = 0,
  main_xanchor = "auto",
  fill_col = "#18A3AC",
  table_bg = "white",
  bg = "white",
  line_col = "white",
  lwd = 1,
  header_font_col = "white",
  table_font_col = "gray20",
  font_size = 14,
  font_family = "Helvetica Neue",
  margin = list(
    l = 0,
    r = 5,
    t = 30,
    b = 0,
    pad = 0
  )
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Input ----
  x <- as.data.frame(x)
  if (.ddSci) {
    # x <- dplyr::mutate_if(x, is.numeric, ddSci)
    # Lose the dep:
    x <- data.frame(lapply(x, function(x) if (is.numeric(x)) ddSci(x) else x))
  }

  # Colnames ----
  if (!is.null(colnames(x))) {
    colnames(x) <- paste0("<b>", colnames(x), "</b>")
  }

  # Rownames ----
  if (!is.null(rownames(x))) {
    rownames(x) <- paste0("<b>", rownames(x), "</b>")
  }

  # plotly ----

  plt <- plotly::plot_ly(x)
  plt <- plotly::add_table(
    plt,
    header = list(
      line = list(
        width = lwd,
        color = c(
          "rgba(255,255,255,0)",
          plotly::toRGB(line_col)
        )
      ),
      fill = list(
        color = c(
          "rgba(255,255,255,0)",
          plotly::toRGB(fill_col)
        )
      ),
      align = c("right", "center"),
      font = list(
        color = plotly::toRGB(header_font_col),
        family = font_family,
        size = font_size
      )
    ),
    cells = list(
      line = list(
        width = lwd,
        color = c(
          plotly::toRGB(line_col),
          plotly::toRGB(fill_col)
        )
      ),
      fill = list(
        color = c(
          plotly::toRGB(fill_col),
          plotly::toRGB(table_bg)
        )
      ),
      align = c("right", "center"),
      font = list(
        color = c(
          plotly::toRGB(header_font_col),
          plotly::toRGB(table_font_col)
        ),
        family = font_family,
        size = font_size
      )
    )
  )

  # layout ----
  main <- paste0("<b>", main, "</b>")
  plt <- plotly::layout(
    plt,
    title = list(
      text = main,
      font = list(
        family = font_family,
        size = font_size,
        color = main_col
      ),
      x = main_x,
      xanchor = main_xanchor
    ),
    paper_bgcolor = plotly::toRGB(bg),
    margin = margin
  )

  plt
} # /rtemis::draw_table


================================================
FILE: R/draw_ts.R
================================================
# draw_ts.R
# ::rtemis::
# 2022 EDG rtemis.org

# => recalc limits for fn = "sum"

#' Interactive Timeseries Plots
#'
#' Draw interactive timeseries plots using `plotly`
#'
#' @param x Numeric vector of values to plot or list of vectors
#' @param time Numeric or Date vector of time corresponding to values of `x`
#' @param window Integer: apply `roll_fn` over this many units of time
#' @param group Factor defining groups
#' @param roll_fn Character: "mean", "median", "max", or "sum": Function to apply on
#' rolling windows of `x`
#' @param roll_col Color for rolling line
#' @param roll_alpha Numeric: transparency for rolling line
#' @param roll_lwd Numeric: width of rolling line
#' @param roll_name Rolling function name (for annotation)
#' @param alpha Numeric \[0, 1\]: Transparency
#' @param align Character: "center", "right", or "left"
#' @param group_names Character vector of group names
#' @param xlab Character: x-axis label
#' @param n_xticks Integer: number of x-axis ticks to use (approximately)
#  @param tickmode
#' @param scatter_type Character: "scatter" or "lines"
#' @param legend Logical: If TRUE, show legend
#' @param x_showspikes Logical: If TRUE, show x-axis spikes on hover
#' @param y_showspikes Logical: If TRUE, show y-axis spikes on hover
#' @param spikedash Character: dash type string ("solid", "dot", "dash",
#' "longdash", "dashdot", or "longdashdot") or a dash length list in px
#' (eg "5px,10px,2px,2px")
#' @param displayModeBar Logical: If TRUE, display plotly's modebar
#' @param theme `Theme` object.
#' @param palette Character: palette name, or list of colors
#' @param filename Character: Path to filename to save plot
#' @param spikemode Character: If "toaxis", spike line is drawn from the data
#' point to the axis the series is plotted on. If "across", the line is drawn
#' across the entire plot area, and supercedes "toaxis". If "marker", then a
#' marker dot is drawn on the axis the series is plotted on
#' @param spikesnap Character: "data", "cursor", "hovered data". Determines
#' whether spikelines are stuck to the cursor or to the closest datapoints.
#' @param spikecolor Color for spike lines
#' @param spikethickness Numeric: spike line thickness
#' @param modeBar_file_format Character: modeBar image export file format
#' @param file_width Numeric: image export width
#' @param file_height Numeric: image export height
#' @param file_scale Numeric: image export scale
#' @param ... Additional arguments to be passed to [draw_scatter]
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' time <- sample(seq(as.Date("2020-03-01"), as.Date("2020-09-23"), length.out = 140))
#' x1 <- rnorm(140)
#' x2 <- rnorm(140, 1, 1.2)
#' # Single timeseries
#' draw_ts(x1, time)
#' # Multiple timeseries input as list
#' draw_ts(list(Alpha = x1, Beta = x2), time)
#' # Multiple timeseries grouped by group, different lengths
#' time1 <- sample(seq(as.Date("2020-03-01"), as.Date("2020-07-23"), length.out = 100))
#' time2 <- sample(seq(as.Date("2020-05-01"), as.Date("2020-09-23"), length.out = 140))
#' time <- c(time1, time2)
#' x <- c(rnorm(100), rnorm(140, 1, 1.5))
#' group <- c(rep("Alpha", 100), rep("Beta", 140))
#' draw_ts(x, time, 7, group)
draw_ts <- function(
  x,
  time,
  window = 7L,
  group = NULL,
  roll_fn = c("mean", "median", "max", "none"),
  roll_col = NULL,
  roll_alpha = 1,
  roll_lwd = 2,
  roll_name = NULL,
  alpha = NULL,
  align = "center",
  group_names = NULL,
  xlab = "Time",
  n_xticks = 12,
  #   tickmode = "array",
  scatter_type = "scatter",
  legend = TRUE,
  x_showspikes = TRUE,
  y_showspikes = FALSE,
  spikedash = "solid",
  spikemode = "across",
  spikesnap = "hovered data",
  spikecolor = NULL,
  spikethickness = 1,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  theme = choose_theme(getOption("rtemis_theme")),
  palette = getOption("rtemis_palette", "rtms"),
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  ...
) {
  # Arguments ----
  roll_fn <- match.arg(roll_fn)
  if (roll_fn == "none") {
    window <- NULL
  }

  # Timeseries ----
  if (!is.null(group)) {
    x <- split(x, group)
    time <- split(time, group)
  }

  if (is.data.frame(x)) {
    x <- as.list(x)
  }

  if (!is.list(x)) {
    x <- list(x)
  }

  if (is.data.frame(time)) {
    time <- as.list(time)
  }

  if (!is.list(time)) {
    time <- list(time)
  }

  if (is.null(group_names)) {
    group_names <- if (!is.null(names(x))) {
      names(x)
    } else {
      paste("Group", seq_along(x))
    }
  }

  idx <- lapply(time, order)
  time <- lapply(seq_along(time), \(i) time[[i]][idx[[i]]])
  if (length(time) < length(x)) {
    time <- rep(time, length(x) / length(time))
    idx <- rep(idx, length(x) / length(idx))
  }
  x <- lapply(seq_along(x), \(i) x[[i]][idx[[i]]])
  # xtl <- lapply(seq_along(x), \(i) zoo::zoo(x[[i]], time[[i]]))

  if (!is.null(window) && window > 0) {
    avg_line <- switch(
      roll_fn,
      mean = lapply(
        x,
        \(xt) data.table::frollmean(xt, n = window, align = align)
      ),
      median = lapply(
        x,
        \(xt) data.table::frollapply(xt, n = window, median, align = align)
      ),
      max = lapply(
        x,
        \(xt) data.table::frollapply(xt, n = window, max, align = align)
      ),
      sum = lapply(x, \(xt) data.table::frollsum(xt, n = window, align = align))
    )
  }

  # Palette ----
  if (is.character(palette)) {
    palette <- get_palette(palette)
  }
  if (is.null(roll_col)) {
    roll_col <- palette[seq_along(x)]
  }

  # draw_scatter ----
  plt <- draw_scatter(
    time,
    x,
    xlab = xlab,
    theme = theme,
    palette = palette,
    alpha = alpha,
    group_names = group_names,
    legend = legend,
    scatter_type = scatter_type,
    x_showspikes = x_showspikes,
    y_showspikes = y_showspikes,
    spikedash = spikedash,
    spikemode = spikemode,
    spikesnap = spikesnap,
    spikecolor = spikecolor,
    spikethickness = spikethickness,
    ...
  )

  # Rolling function line ----
  if (is.null(roll_name)) {
    roll_name <- paste0("Rolling ", roll_fn, " (window=", window, ")")
  }

  if (!is.null(window)) {
    for (i in seq_along(x)) {
      plt <- plt |>
        plotly::add_trace(
          x = time[[i]],
          y = avg_line[[i]],
          type = "scatter",
          mode = "lines",
          line = list(
            color = plotly::toRGB(roll_col[[i]], alpha = roll_alpha),
            width = roll_lwd
          ),
          name = roll_name
        )
    }
  }

  # Config
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    )
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_ts


================================================
FILE: R/draw_varimp.R
================================================
# draw_varimp.R
# ::rtemis::
# 2017 EDG rtemis.org

#' Interactive Variable Importance Plot
#'
#' Plot variable importance using `plotly`
#'
#' A simple `plotly` wrapper to plot horizontal barplots, sorted by value,
#' which can be used to visualize variable importance, model coefficients, etc.
#'
#' @param x Numeric vector (or coercible to numeric): Input.
#' @param names Vector, string: Names of features.
#' @param main Character: Main title.
#' @param type Character: "bar" or "line".
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param plot_top Integer: Plot this many top features.
#' @param orientation Character: "h" or "v".
#' @param line_width Numeric: Line width.
#' @param labelify Logical: If TRUE, labelify feature names.
#' @param alpha Numeric: Transparency.
#' @param palette Character vector: Colors to use.
#' @param mar Vector, numeric, length 4: Plot margins in pixels (NOT inches).
#' @param font_size Integer: Overall font size to use (essentially for the
#' title at this point).
#' @param axis_font_size Integer: Font size to use for axis labels and tick labels.
#' @param theme `Theme` object.
#' @param showlegend Logical: If TRUE, show legend.
#' @param filename Character: Path to save the plot image.
#' @param file_width Numeric: Width of the saved plot image.
#' @param file_height Numeric: Height of the saved plot image.
#' @param file_scale Numeric: Scale of the saved plot image.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' # synthetic data
#' x <- rnorm(10)
#' names(x) <- paste0("Feature_", seq(x))
#' draw_varimp(x)
#' draw_varimp(x, orientation = "h")
draw_varimp <- function(
  x,
  names = NULL,
  main = NULL,
  type = c("bar", "line"),
  xlab = NULL,
  ylab = NULL,
  plot_top = 1, # 1 or less means plot this percent
  orientation = "v",
  line_width = 12,
  labelify = TRUE,
  alpha = 1,
  palette = get_palette(getOption("rtemis_palette")),
  mar = NULL,
  font_size = 16,
  axis_font_size = 14,
  theme = choose_theme(getOption("rtemis_theme")),
  showlegend = TRUE,
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
) {
  # Dependencies ----
  check_dependencies("plotly")

  # Arguments ----
  type <- match.arg(type)
  if (is.null(mar)) {
    mar <- if (is.null(main)) c(20, 20, 20, 20) else c(20, 20, 40, 20)
  }

  # Theme ----
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]])
  tick_col <- plotly::toRGB(theme[["tick_col"]])
  labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])

  ## Axis font ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )

  ## Tick font ----
  tickfont <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = theme[["tick_labels_col"]]
  )

  # Data ----
  if (NCOL(x) > 1 && NROW(x) > 1) {
    cli::cli_abort("x must be a vector or single row or column")
  }

  ## Names ----
  if (is.null(names)) {
    if (is.null(names(x))) {
      .names <- if (NCOL(x) == 1) {
        labelify(rownames(x))
      } else {
        labelify(colnames(x))
      }
    } else {
      .names <- labelify(names(x))
    }
  } else {
    .names <- labelify(names)
  }

  x <- as.numeric(x)
  if (length(.names) == 0) {
    .names <- paste("Feature", seq_along(x))
  }

  ## Index ----
  index <- if (plot_top <= 1) {
    order(abs(x))[(length(x) - plot_top * length(x)):length(x)]
  } else {
    if (plot_top > length(x)) {
      plot_top <- length(x)
    }
    order(abs(x))[(length(x) - plot_top + 1):length(x)]
  }
  x <- x[index]
  .names <- .names[index]
  # reorder to arrange negative to positive
  index <- order(x)
  x <- x[index]
  .names <- .names[index]
  y <- factor(.names, levels = .names)

  # Colors ----
  col <- palette[[1]]
  col <- color_adjust(col, alpha = alpha)

  # plotly ----
  if (type == "bar") {
    plt <- plotly::plot_ly(
      x = if (orientation == "h") x else y,
      y = if (orientation == "h") y else x,
      type = "bar",
      marker = list(
        color = col,
        line = list(width = NULL)
      ),
      showlegend = FALSE
    )
  } else {
    # Plot each x[i] value as a line segment from 0 to x[i]
    plt <- plotly::plot_ly()
    for (i in seq_along(x)) {
      plt <- plotly::add_trace(
        plt,
        x = if (orientation == "h") c(0, x[i]) else c(y[i], y[i]),
        y = if (orientation == "h") c(y[i], y[i]) else c(0, x[i]),
        type = "scatter",
        mode = "lines",
        line = list(color = col, width = line_width),
        name = .names[i],
        showlegend = FALSE,
        # Show "_name[i]: value" on hover
        hoverinfo = "text",
        hovertext = paste0(.names[i], ": ", ddSci(x[i]))
      )
    }
  }

  # Layout ----
  if (is.null(xlab)) {
    xlab <- if (orientation == "h") "Variable Importance" else ""
  }
  if (is.null(ylab)) {
    ylab <- if (orientation == "h") "" else "Variable Importance"
  }
  plt <- plotly::layout(
    plt,
    margin = list(
      b = mar[1],
      l = mar[2],
      t = mar[3],
      r = mar[4],
      pad = 0
    ), # inner plot area padding
    xaxis = list(
      title = list(
        text = xlab,
        font = f
      ),
      # showline = axes_visible,
      # mirror = axes_mirrored,
      showgrid = FALSE,
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tickfont,
      zeroline = FALSE
    ),
    yaxis = list(
      title = list(
        text = ylab,
        font = f
      ),
      # showline = axes_visible,
      # mirror = axes_mirrored,
      showgrid = theme[["grid"]],
      # gridcolor = grid_col,
      # gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tickfont,
      zeroline = FALSE
    ),
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]]
    ),
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg
  )

  # Remove padding
  plt[["sizingPolicy"]][["padding"]] <- 0

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # draw_varimp


================================================
FILE: R/draw_volcano.R
================================================
# draw_volcano
# ::rtemis::
# 2022 EDG rtemis.org
# allow custom grouping

# References
# https://github.com/plotly/plotly.js/blob/master/src/plot_api/plot_config.js

#' Volcano Plot
#'
#' @param x Numeric vector: Input values, e.g. log2 fold change, coefficients, etc.
#' @param pvals Numeric vector: p-values.
#' @param xnames Character vector: `x` names.
#' @param group Optional factor: Used to color code points. If NULL, significant points
#' below `x_thresh`, non-significant points, and significant points
#' above `x_thresh` will be plotted with the first, second and third
#' color of `palette`.
#' @param x_thresh Numeric x-axis threshold separating low from high.
#' @param p_thresh Numeric: p-value threshold of significance.
#' @param p_adjust_method Character: p-value adjustment method.
#' "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none".
#' Default = "holm". Use "none" for raw p-values.
#' @param p_transform function.
#' @param legend Logical: If TRUE, show legend. Will default to FALSE, if
#' `group = NULL`, otherwise to TRUE.
#' @param legend_lo Character: Legend to annotate significant points below the
#' `x_thresh`.
#' @param legend_hi Character: Legend to annotate significant points above the
#' `x_thresh`.
#' @param label_lo Character: label for low values.
#' @param label_hi Character: label for high values.
#' @param main Character: Main title.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param margin Named list of plot margins.
#' Default = `list(b = 65, l = 65, t = 50, r = 10, pad = 0)`.
#' @param xlim Numeric vector, length 2: x-axis limits.
#' @param ylim Numeric vector, length 2: y-axis limits.
#' @param alpha Numeric: point transparency.
#' @param hline Numeric: If defined, draw a horizontal line at this y value.
#' @param hline_col Color for `hline`.
#' @param hline_width Numeric: Width for `hline`.
#' @param hline_dash Character: Type of line to draw: "solid", "dot", "dash",
#' "longdash", "dashdot", or "longdashdot".
#' @param hline_annotate Character: Text of horizontal line annotation if
#' `hline` is set.
#' @param hline_annotation_x Numeric: x position to place annotation with paper
#' as reference. 0: to the left of the plot area; 1: to the right of the plot area.
#' @param annotate Logical: If TRUE, annotate significant points.
#' @param annotate_col Color for annotations.
#' @param theme `Theme` object.
#' @param font_size Integer: Font size.
#' @param palette Character vector: Colors to use. If `group` is NULL, the first, second and third
#' colors will be used for significant points with negative coefficients, non-significant points, and
#' significant points with positive coefficients, respectively. If `group` is not NULL, colors will
#' be assigned to groups, in order of appearance.
#' @param legend_x_lo Numeric: x position of `legend_lo`.
#' @param legend_x_hi Numeric: x position of `legend_hi`.
#' @param legend_y Numeric: y position for `legend_lo` and `legend_hi`.
#' @param annotate_n Integer: Number of significant points to annotate.
#' @param ax_lo Numeric: Sets the x component of the arrow tail about the arrow head for
#' significant points below `x_thresh`.
#' @param ay_lo Numeric: Sets the y component of the arrow tail about the arrow head for
#' significant points below `x_thresh`.
#' @param ax_hi Numeric: Sets the x component of the arrow tail about the arrow head for
#' significant points above `x_thresh`.
#' @param ay_hi Numeric: Sets the y component of the arrow tail about the arrow head for
#' significant points above `x_thresh`.
#' @param annotate_alpha Numeric: Transparency for annotations.
#' @param hovertext Character vector: Text to display on hover.
#' @param displayModeBar Logical: If TRUE, display plotly mode bar.
#' @param filename Character: Path to save the plot image.
#' @param file_width Numeric: Width of the saved plot image.
#' @param file_height Numeric: Height of the saved plot image.
#' @param file_scale Numeric: Scale of the saved plot image.
#' @param verbosity Integer: Verbosity level.
#' @param ... Additional arguments passed to [draw_scatter].
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' set.seed(2019)
#' y <- rnormmat(500, 500, return_df = TRUE)
#' x <- data.frame(x = y[, 3] + y[, 5] - y[, 9] + y[, 15] + rnorm(500))
#' mod <- massGLM(x, y)
#' draw_volcano(summary(mod)[["Coefficient_x"]], summary(mod)[["p_value_x"]])
draw_volcano <- function(
  x,
  pvals,
  xnames = NULL,
  group = NULL,
  x_thresh = 0,
  p_thresh = .05,
  p_adjust_method = c(
    "holm",
    "hochberg",
    "hommel",
    "bonferroni",
    "BH",
    "BY",
    "fdr",
    "none"
  ),
  p_transform = function(x) -log10(x),
  legend = NULL,
  legend_lo = NULL,
  legend_hi = NULL,
  label_lo = "Low",
  label_hi = "High",
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0),
  xlim = NULL,
  ylim = NULL,
  alpha = NULL,
  hline = NULL,
  hline_col = NULL,
  hline_width = 1,
  hline_dash = "solid",
  hline_annotate = NULL,
  hline_annotation_x = 1,
  theme = choose_theme(getOption("rtemis_theme")),
  annotate = TRUE,
  annotate_col = theme[["labs_col"]],
  font_size = 16,
  palette = NULL,
  legend_x_lo = NULL,
  legend_x_hi = NULL,
  legend_y = .97,
  annotate_n = 7L,
  ax_lo = NULL, # 40,
  ay_lo = NULL,
  ax_hi = NULL, # -40,
  ay_hi = NULL,
  annotate_alpha = .7,
  hovertext = NULL,
  displayModeBar = "hover",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  verbosity = 1L,
  ...
) {
  xname <- deparse(substitute(x))
  p_adjust_method <- match.arg(p_adjust_method)
  filt <- !is.na(x) & !is.na(pvals)
  if (is.null(xnames)) {
    xnames <- names(x)
  } else {
    xnames <- xnames
  }
  xnames <- xnames[filt]
  if (!is.null(group)) {
    group <- group[filt]
  }
  x <- x[filt]
  pvals <- pvals[filt]
  if (is.null(xnames)) {
    xnames <- paste("Feature", seq_along(x))
  }
  if (is.null(legend)) {
    legend <- !is.null(group)
  }

  p_adjusted <- p.adjust(pvals, method = p_adjust_method)
  index_ltpthresh <- p_adjusted < p_thresh
  p_transformed <- p_transform(p_adjusted)
  if (is.null(xlab)) {
    xlab <- labelify(xname)
  }

  # Default to lo - ns - hi groups
  if (is.null(group)) {
    group <- rep("NS", length(pvals))
    group[index_ltpthresh & x < x_thresh] <- label_lo
    group[index_ltpthresh & x > x_thresh] <- label_hi
    group <- factor(group, levels = c(label_lo, "NS", label_hi))
    if (is.null(palette)) {
      palette <- list("#43A4AC", "#7f7f7f", "#FA9860")
    }
  }

  group.counts <- table(group)
  include <- group.counts > 0
  if (verbosity > 0L) {
    cat("Group counts:\n")
    print(group.counts)
  }

  # Colors for groups
  if (is.null(palette)) {
    palette <- get_palette(getOption("rtemis_palette", "rtms"))
  }

  # Theme ----
  check_is_S7(theme, Theme)

  # y-axis label ----
  if (is.null(ylab)) {
    ylab <- fn2label(p_transform, "p-value")
    if (p_adjust_method != "none") {
      ylab <- paste0(ylab, " (", p_adjust_method, "-corrected)")
    }
  }

  # Plot ----
  if (is.null(hovertext)) {
    hovertext <- xnames
  }
  plt <- draw_scatter(
    x,
    p_transformed,
    main = main,
    xlab = xlab,
    ylab = ylab,
    xlim = xlim,
    ylim = ylim,
    alpha = alpha,
    theme = theme,
    margin = margin,
    legend = legend,
    group = group,
    palette = palette[include],
    hovertext = hovertext,
    ...
  )

  # High - Low legend ----
  autolegend_x_lo <- is.null(legend_x_lo)
  if (autolegend_x_lo) {
    # legend.x.lo <- Filter(\(x) x < x.thresh, x) |> range() |> diff() * -.2 + x.thresh
    legend_x_lo <- x_thresh - abs(diff(c(x_thresh, min(x, na.rm = TRUE)))) * .2
  }

  autolegend_x_hi <- is.null(legend_x_hi)
  if (autolegend_x_hi) {
    # legend.x.hi <- Filter(\(x) x > x.thresh, x) |> range() |> diff() * .2 + x.thresh
    legend_x_hi <- x_thresh + abs(diff(c(x_thresh, max(x, na.rm = TRUE)))) * .2
  }

  legxdiff <- legend_x_hi - legend_x_lo

  if (autolegend_x_lo) {
    legend_x_lo <- x_thresh - legxdiff / 2
  }
  if (autolegend_x_hi) {
    legend_x_hi <- x_thresh + legxdiff / 2
  }

  if (group.counts[1] > 0 && !is.null(legend_lo)) {
    plt <- plt |>
      plotly::add_annotations(
        x = legend_x_lo,
        y = legend_y,
        text = legend_lo,
        xref = "x",
        yref = "paper",
        showarrow = FALSE,
        font = list(
          color = palette[[1]],
          family = theme[["font_family"]],
          size = font_size
        )
      )
  }

  if (group.counts[3] > 0 && !is.null(legend_hi)) {
    plt <- plt |>
      plotly::add_annotations(
        x = legend_x_hi,
        y = legend_y,
        text = legend_hi,
        xref = "x",
        yref = "paper",
        showarrow = FALSE,
        font = list(
          color = palette[[3]],
          family = theme[["font_family"]],
          size = font_size
        )
      )
  }

  # Annotations ----
  if (annotate) {
    index_ltxthresh <- x < x_thresh
    index_gtxthresh <- x > x_thresh

    index_lo <- index_ltpthresh & index_ltxthresh
    index_hi <- index_ltpthresh & index_gtxthresh
    annotate_n_lo <- annotate_n_hi <- annotate_n
    if (sum(index_lo) < annotate_n) {
      annotate_n_lo <- sum(index_lo)
    }
    if (sum(index_hi) < annotate_n) {
      annotate_n_hi <- sum(index_hi)
    }

    if (annotate_n_lo > 0) {
      lo_ord <- order(pvals[index_lo])
      lo_x <- x[index_lo][lo_ord[seq_len(annotate_n_lo)]]
      lo_pval <- p_transformed[index_lo][lo_ord[seq_len(annotate_n_lo)]]
      lo_name <- xnames[index_lo][lo_ord[seq_len(annotate_n_lo)]]

      if (is.null(ay_lo)) {
        if (is.null(ay_lo)) {
          ay_lo <- drange(order(lo_pval), 30, -30)
        }
      }
      if (is.null(ax_lo)) {
        ax_lo <- 5 + 5 * annotate_n_lo
      }
      plt <- plt |>
        plotly::add_annotations(
          x = lo_x,
          y = lo_pval,
          text = lo_name,
          arrowhead = 4,
          arrowcolor = adjustcolor(theme[["fg"]], .33),
          arrowsize = .5,
          arrowwidth = 1,
          ax = ax_lo,
          ay = ay_lo,
          xanchor = "left",
          font = list(
            size = 16,
            family = theme[["font_family"]],
            color = adjustcolor(theme[["fg"]], annotate_alpha)
          )
        )
    }

    # Annotate 10 most significant increasing
    if (annotate_n_hi > 0) {
      hi_ord <- order(pvals[index_ltpthresh & index_gtxthresh])
      hi_x <- x[index_ltpthresh & index_gtxthresh][hi_ord[seq_len(
        annotate_n_hi
      )]]
      hi_pval <- p_transformed[
        index_ltpthresh & index_gtxthresh
      ][hi_ord[seq_len(annotate_n_hi)]]
      hi_name <- xnames[index_ltpthresh & index_gtxthresh][hi_ord[seq_len(
        annotate_n_hi
      )]]

      if (is.null(ay_hi)) {
        ay_hi <- drange(order(hi_pval), 50, -50)
      }
      if (is.null(ax_hi)) {
        ax_hi <- -5 - 5 * annotate_n_hi
      }
      plt <- plt |>
        plotly::add_annotations(
          x = hi_x,
          y = hi_pval,
          text = hi_name,
          arrowhead = 4,
          arrowcolor = adjustcolor(theme[["fg"]], .33),
          arrowsize = .5,
          arrowwidth = 1,
          ax = ax_hi,
          ay = ay_hi,
          xanchor = "right",
          font = list(
            size = 16,
            family = theme[["font_family"]],
            color = adjustcolor(theme[["fg"]], annotate_alpha)
          )
        )
    }
  }

  # hline ----
  if (!is.null(hline)) {
    if (is.null(hline_col)) {
      hline_col <- theme[["fg"]]
    }
    hline_col <- recycle(hline_col, hline)
    hline_width <- recycle(hline_width, hline)
    hline_dash <- recycle(hline_dash, hline)
    hlinel <- lapply(seq_along(hline), function(i) {
      list(
        type = "line",
        x0 = 0,
        x1 = 1,
        xref = "paper",
        y0 = hline[i],
        y1 = hline[i],
        line = list(
          color = hline_col[i],
          width = hline_width[i],
          dash = hline_dash[i]
        )
      )
    })
    plt <- plotly::layout(plt, shapes = hlinel)

    # Annotate horizontal lines on the right border of the plot
    if (!is.null(hline_annotate)) {
      plt <- plt |>
        plotly::add_annotations(
          xref = "paper",
          yref = "y",
          xanchor = "right",
          yanchor = "bottom",
          x = hline_annotation_x,
          y = hline,
          text = hline_annotate,
          font = list(
            family = theme[["font_family"]],
            size = font_size,
            color = annotate_col
          ),
          showarrow = FALSE
        )
    }
  }

  # Config ----
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = "svg",
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  )

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  plt
} # /rtemis::draw_volcano


================================================
FILE: R/draw_xt.R
================================================
# draw_xt.R
# ::rtemis::
# 2024 EDG rtemis.org

# Multiple legends
# https://plotly.com/python/legend/#adding-multiple-legends
# https://plotly.com/r/legend/

#' Plot timeseries data
#'
#' @param x Datetime vector or list of vectors.
#' @param y Numeric vector or named list of vectors: y-axis data.
#' @param x2 Datetime vector or list of vectors, optional: must be provided if `y2` does not
#' correspond to values in `x`. A single x-axis will be drawn for all values in `x` and `x2`.
#' @param y2 Numeric vector, optional: If provided, a second y-axis will be added to the right
#' side of the plot.
#' @param which_xy Integer vector: Indices of `x` and `y` to plot.
#' If not provided, will select up to the first two x-y traces.
#' @param which_xy2 Integer vector: Indices of `x2` and `y2` to plot.
#' If not provided, will select up to the first two x2-y2 traces.
#' @param shade_bin Integer vector \{0, 1\}: Time points in `x` to shade on the plot. For example,
#' if there are 10 time points in `x`, and you want to shade time points 3 to 7,
#' `shade_bin = c(0, 0, 1, 1, 1, 1, 1, 0, 0, 0)`. Only set `shade_bin` or `shade_interval`, not
#' both.
#' @param shade_interval List of numeric vectors: Intervals to shade on the plot. Only set
#' `shade_bin` or `shade_interval`, not both.
#' @param shade_col Color: Color to shade intervals.
#' @param shade_x Numeric vector: x-values to use for shading.
#' @param shade_name Character: Name for shaded intervals.
#' @param shade_showlegend Logical: If TRUE, show legend for shaded intervals.
#' @param ynames Character vector, optional: Names for each vector in `y`.
#' @param y2names Character vector, optional: Names for each vector in `y2`.
#' @param xlab Character: x-axis label.
#' @param ylab Character: y-axis label.
#' @param y2lab Character: y2-axis label.
#' @param xunits Character: x-axis units.
#' @param yunits Character: y-axis units.
#' @param y2units Character: y2-axis units.
#' @param yunits_col Color for y-axis units.
#' @param y2units_col Color for y2-axis units.
#' @param zt Numeric vector: Zeitgeber time. If provided, will be shown on the x-axis instead of
#' `x`. To be used only with a single `x` vector and no `x2`.
#' @param show_zt Logical: If TRUE, show zt on x-axis, if zt is provided.
#' @param show_zt_every Optional integer: Show zt every `show_zt_every` ticks. If NULL, will be
#' calculated to be `x_nticks` +/- 1 if `x_nticks` is not 0, otherwise 12 +/- 1.
#' @param zt_nticks Integer: Number of zt ticks to show. Only used if `show_zt_every` is NULL.
#' The actual number of ticks shown will depend on the periodicity of zt, so that zt = 0 is always
#' included.
#' @param main Character: Main title.
#' @param main_y Numeric: Y position of main title.
#' @param main_yanchor Character: "top", "middle", "bottom".
#' @param x_nticks Integer: Number of ticks on x-axis.
#' @param y_nticks Integer: Number of ticks on y-axis.
#' @param show_rangeslider Logical: If TRUE, show a range slider.
#' @param slider_start Numeric: Start of range slider.
#' @param slider_end Numeric: End of range slider.
#' @param theme `Theme` object.
#' @param palette Character vector: Colors to be used to draw each vector in `y` and `y2`, in order.
#' @param font_size Numeric: Font size for text.
#' @param yfill Character: Fill type for y-axis: "none", "tozeroy", "tonexty".
#' @param y2fill Character: Fill type for y2-axis: "none", "tozeroy", "tonexty".
#' @param fill_alpha Numeric: Fill opacity for y-axis.
#' @param yline_width Numeric: Line width for y-axis lines.
#' @param y2line_width Numeric: Line width for y2-axis lines.
#' @param x_showspikes Logical: If TRUE, show spikes on x-axis.
#' @param spike_dash Character: Dash type for spikes: "solid", "dot", "dash", "longdash",
#' "dashdot", "longdashdot".
#' @param spike_col Color for spikes.
#' @param x_spike_thickness Numeric: Thickness of spikes. `-2` avoids drawing border around spikes.
#' @param tickfont_size Numeric: Font size for tick labels.
#' @param x_tickmode Character: "auto", "linear", "array".
#' @param x_tickvals Numeric vector: Tick positions.
#' @param x_ticktext Character vector: Tick labels.
#' @param x_tickangle Numeric: Angle of tick labels.
#' @param legend_x Numeric: X position of legend.
#' @param legend_y Numeric: Y position of legend.
#' @param legend_xanchor Character: "left", "center", "right".
#' @param legend_yanchor Character: "top", "middle", "bottom".
#' @param legend_orientation Character: "v" for vertical, "h" for horizontal.
#' @param margin Named list with 4 numeric values: "l", "r", "t", "b" for left, right, top, bottom
#' margins.
#' @param x_standoff Numeric: Distance from x-axis to x-axis label.
#' @param y_standoff Numeric: Distance from y-axis to y-axis label.
#' @param y2_standoff Numeric: Distance from y2-axis to y2-axis label.
#' @param hovermode Character: "closest", "x", "x unified".
#' @param displayModeBar Logical: If TRUE, display plotly mode bar.
#' @param modeBar_file_format Character: "png", "svg", "jpeg", "webp", "pdf": file format for mode
#' bar image export.
#' @param scrollZoom Logical: If TRUE, enable zooming by scrolling.
#' @param filename Character: Path to save the plot image.
#' @param file_width Numeric: Width of the saved plot image.
#' @param file_height Numeric: Height of the saved plot image.
#' @param file_scale Numeric: Scale of the saved plot image.
#'
#' @return `plotly` object.
#'
#' @author EDG
#' @export
#'
#' @examplesIf interactive()
#' datetime <- seq(
#'   as.POSIXct("2020-01-01 00:00"),
#'   as.POSIXct("2020-01-02 00:00"),
#'   by = "hour"
#' )
#' df <- data.frame(
#'   datetime = datetime,
#'   value1 = rnorm(length(datetime)),
#'   value2 = rnorm(length(datetime))
#' )
#' draw_xt(df, x = df[, 1], y = df[, 2:3])
draw_xt <- function(
  x,
  y,
  x2 = NULL,
  y2 = NULL,
  which_xy = NULL,
  which_xy2 = NULL,
  # Shade intervals
  shade_bin = NULL,
  shade_interval = NULL,
  shade_col = NULL,
  shade_x = NULL,
  shade_name = "",
  shade_showlegend = FALSE,
  ynames = NULL,
  y2names = NULL,
  xlab = NULL,
  ylab = NULL,
  y2lab = NULL,
  xunits = NULL,
  yunits = NULL,
  y2units = NULL,
  yunits_col = NULL,
  y2units_col = NULL,
  zt = NULL,
  show_zt = TRUE,
  show_zt_every = NULL,
  zt_nticks = 18L,
  main = NULL,
  main_y = 1,
  main_yanchor = "bottom",
  x_nticks = 0,
  y_nticks = 0,
  show_rangeslider = NULL,
  slider_start = NULL,
  slider_end = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  font_size = 16,
  yfill = "none",
  y2fill = "none",
  fill_alpha = .2,
  yline_width = 2,
  y2line_width = 2,
  x_showspikes = TRUE,
  spike_dash = "solid",
  spike_col = NULL,
  x_spike_thickness = -2,
  tickfont_size = 16,
  x_tickmode = "auto",
  x_tickvals = NULL,
  x_ticktext = NULL,
  x_tickangle = NULL,
  # legend
  legend_x = 0,
  legend_y = 1.1,
  legend_xanchor = "left",
  legend_yanchor = "top",
  legend_orientation = "h",
  margin = list(l = 75, r = 75, b = 75, t = 75),
  # axis labels
  x_standoff = 20L,
  y_standoff = 20L,
  y2_standoff = 20L,
  hovermode = "x",
  # config
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  scrollZoom = TRUE,
  filename = NULL,
  file_width = 960,
  file_height = 500,
  file_scale = 1
) {
  # Names ----
  .xname <- labelify(gsub(".*\\$", "", deparse(substitute(x))))
  .x2name <- labelify(gsub(".*\\$", "", deparse(substitute(x2))))
  if (!is.null(x2) && .xname != .x2name) {
    .xname <- NULL
  }
  .yname <- labelify(gsub(".*\\$", "", deparse(substitute(y))))
  .y2name <- labelify(gsub(".*\\$", "", deparse(substitute(y2))))

  # Data ----

  # Data to lists
  if (!is.null(y2) && is.null(x2)) {
    x2 <- x
  }
  if (!is.list(x)) {
    x <- list(x)
  }
  if (!is.list(y)) {
    y <- list(y)
  }
  if (!is.null(y2) && !is.list(y2)) {
    y2 <- list(y2)
  }
  if (!is.null(y2) && !is.list(x2)) {
    x2 <- list(x2)
  }

  # Recycle x and x2 as needed
  if (length(y) > 1 && length(x) == 1) {
    x <- rep(x, length(y))
  }
  if (!is.null(y2) && length(y2) > 1 && length(x2) == 1) {
    x2 <- rep(x2, length(y2))
  }
  if (length(x) != length(y)) {
    cli::cli_abort("{.arg x} and {.arg y} must be the same length")
  }
  if (!is.null(y2) && length(x2) != length(y2)) {
    cli::cli_abort("{.arg x2} and {.arg y2} must be the same length")
  }

  # Which traces to plot ----
  # By default, plot up to two for each y axis
  if (is.null(which_xy)) {
    if (length(x) > 2) {
      x <- x[1:2]
      y <- y[1:2]
    }
  } else {
    x <- x[which_xy]
    y <- y[which_xy]
  }

  if (is.null(which_xy2)) {
    if (length(x2) > 2) {
      x2 <- x2[1:2]
      y2 <- y2[1:2]
    }
  } else {
    x2 <- x2[which_xy2]
    y2 <- y2[which_xy2]
  }

  # Rangeslider ----
  if (is.null(show_rangeslider)) {
    show_rangeslider <- length(x[[1]]) > 500
  }

  # Check args ----
  if (!is.null(shade_bin) && !is.null(shade_interval)) {
    cli::cli_abort(
      "Only set {.arg shade_bin} or {.arg shade_interval}, not both"
    )
  }

  # Names ----
  if (is.null(ynames)) {
    ynames <- if (is.null(names(y))) {
      if (length(y) > 1) {
        paste(.yname, seq_along(y), sep = "_")
      } else {
        .yname
      }
    } else {
      names(y)
    }
  }

  if (!is.null(y2) && is.null(y2names)) {
    y2names <- if (is.null(names(y2))) {
      if (length(y2) > 1) {
        paste(.y2name, seq_along(y2), sep = "_")
      } else {
        .y2name
      }
    } else {
      names(y2)
    }
  }

  # Add units
  if (!is.null(yunits)) {
    if (is.null(yunits_col)) {
      yunits_col <- if (length(y) == 1) {
        palette[[1]]
      } else {
        "#00ff00"
      }
    }
    yunits <- paste0(
      "(",
      '<span style="color:',
      yunits_col,
      ';">',
      yunits,
      "</span>",
      ")"
    )
    ynames <- paste(ynames, yunits)
  }
  if (!is.null(y2units)) {
    if (is.null(y2units_col)) {
      y2units_col <- if (length(y2) == 1) {
        palette[[length(x) + 1]]
      } else {
        "#ff0000"
      }
    }
    y2units <- paste0(
      "(",
      '<span style="color:',
      y2units_col,
      ';">',
      y2units,
      "</span>",
      ")"
    )
    y2names <- paste(y2names, y2units)
  }

  # Theme ----
  check_is_S7(theme, Theme)

  bg <- plotly::toRGB(theme[["bg"]])
  plot_bg <- plotly::toRGB(theme[["plot_bg"]])
  grid_col <- plotly::toRGB(theme[["grid_col"]], theme[["grid_alpha"]])
  tick_col <- plotly::toRGB(theme[["tick_col"]])
  legend_col <- labs_col <- plotly::toRGB(theme[["labs_col"]])
  main_col <- plotly::toRGB(theme[["main_col"]])
  if (!theme[["axes_visible"]]) {
    tick_col <- labs_col <- "transparent"
  }
  if (is.null(spike_col)) {
    spike_col <- theme[["fg"]]
  }
  zero_col <- adjustcolor(theme[["zerolines_col"]], theme[["zerolines_alpha"]])

  # Colors ----
  # if (is.null(line1.fill.col)) line1.fill.col <- plotly::toRGB(line1.col, alpha = 0.4)
  # if (is.null(line2.fill.col) && !is.null(y2)) {
  #   line2.fill.col <- plotly::toRGB(line2.col, alpha = 0.4)
  # }
  palette_y <- palette[seq_along(y)]
  palette_y2 <- palette[length(y) + seq_along(y2)]

  if (length(y) > 1 && length(yfill) == 1) {
    yfill <- rep(yfill, length(y))
  }
  stopifnot(length(yfill) == length(y))

  if (length(y2) > 1 && length(y2fill) == 1) {
    y2fill <- rep(y2fill, length(y2))
  }

  if (!is.null(y2)) {
    stopifnot(length(y2fill) == length(y2))
  }

  # Fonts ----
  f <- list(
    family = theme[["font_family"]],
    size = font_size,
    color = labs_col
  )
  tick_font <- list(
    family = theme[["font_family"]],
    size = tickfont_size,
    color = theme[["tick_labels_col"]]
  )

  # Calculate shade_interval from shade_bin ----
  if (!is.null(shade_bin)) {
    shade_bin_p <- c(0, shade_bin, 0)
    shade_bin_starts <- which(diff(shade_bin_p) == 1)
    shade_bin_ends <- which(diff(shade_bin_p) == -1)
    shade_interval <- lapply(
      seq_along(shade_bin_starts),
      \(i) c(shade_bin_starts[i], shade_bin_ends[i])
    )
  }

  # zt ----
  if (show_zt && !is.null(zt)) {
    x_tickmode <- "array"
    if (is.null(show_zt_every)) {
      # Get periodicity of ZT
      idi0 <- which(zt == 0)
      # Get differences between 0s
      diff_idi0 <- diff(idi0)[1]
      # Pick show.zt.every to be perfect divisor of diff_idi0 so that total length is closest to zt.nticks
      # a) diff_idi0 %% show.zt.every must be 0
      # b) length(zt) / show.zt.every must be closest to zt.nticks
      sze <- round(length(zt) / zt_nticks)
      i <- 0
      # if diff_idi0 %% sze != 0, search for closest integer above or below sze
      sze_high <- sze_low <- sze
      while (diff_idi0 %% sze_low != 0) {
        sze_low <- sze_low - 1
      }
      while (diff_idi0 %% sze_high != 0) {
        sze_high <- sze_high + 1
      }
      show_zt_every <- c(sze_low, sze_high)[which.min(abs(c(
        sze - sze_low,
        sze - sze_high
      )))]
    }
    idi <- seq(1, length(zt), by = show_zt_every)
    # Make sure 0 is included
    while (!0 %in% zt[idi]) {
      idi <- idi + 1
    }
    idi <- idi[idi <= length(zt)]
    x_tickvals <- x[[1]][idi]
    x_ticktext <- zt[idi]
    if (is.null(xlab)) xlab <- "ZT"
  }

  # Plot ----
  plt <- plotly::plot_ly(type = "scatter", mode = "lines")

  # Shade intervals ----
  if (!is.null(shade_interval)) {
    if (is.null(shade_x)) {
      shade_x <- x[[1]]
    }
    if (is.null(shade_col)) {
      shade_col <- plotly::toRGB(theme[["fg"]], 0.15)
    }
    ymax <- max(unlist(y), unlist(y2))
    # Draw shaded rectangles
    for (i in seq_along(shade_interval)) {
      plt <- plotly::add_trace(
        plt,
        x = c(
          shade_x[shade_interval[[i]][1]],
          shade_x[shade_interval[[i]][2]],
          shade_x[shade_interval[[i]][2]],
          shade_x[shade_interval[[i]][1]]
        ),
        y = c(0, 0, ymax, ymax),
        fill = "toself",
        fillcolor = shade_col,
        line = list(color = "transparent"),
        yaxis = "y",
        xaxis = "x",
        name = shade_name,
        legendgroup = if (shade_showlegend) shade_name else NULL,
        showlegend = shade_showlegend && i == 1
      )
    }
  } # /shade.interval

  for (i in seq_along(y)) {
    plt <- plotly::add_trace(
      plt,
      x = x[[i]],
      y = y[[i]],
      line = list(color = palette_y[[i]], width = yline_width),
      fill = yfill[[i]],
      fillcolor = plotly::toRGB(palette_y[[i]], alpha = fill_alpha),
      name = ynames[[i]],
      legendgroup = if (!is.null(y2)) "legend_y" else NULL
    )
  } # /y scatter

  if (!is.null(y2)) {
    for (i in seq_along(y2)) {
      plt <- plotly::add_trace(
        plt,
        x = x2[[i]],
        y = y2[[i]],
        line = list(color = palette_y2[[i]], width = y2line_width),
        fill = y2fill[[i]],
        fillcolor = plotly::toRGB(palette_y2[[i]], alpha = fill_alpha),
        name = y2names[[i]],
        legendgroup = "legend_y2",
        yaxis = "y2"
      )
    }
  } # /y2 scatter

  # Labels ----
  if (is.null(xlab)) {
    xlab <- .xname
  }
  if (!is.null(xunits)) {
    xlab <- paste0(xlab, " (", xunits, ")")
  }

  if (!is.null(yunits)) {
    ylab <- if (is.null(ylab)) {
      if (length(y) == 1) {
        ynames
      } else {
        yunits
      }
    } else {
      paste(ylab, yunits)
    }
  }

  if (!is.null(y2units)) {
    y2lab <- if (is.null(y2lab)) {
      if (length(y2) == 1) {
        y2names
      } else {
        y2units
      }
    } else {
      paste(y2lab, y2units)
    }
  }

  # Layout ----
  plt <- plotly::layout(
    plt,
    xaxis = list(
      title = list(
        text = xlab,
        standoff = x_standoff,
        font = f
      ),
      nticks = x_nticks,
      showspikes = x_showspikes,
      spikedash = spike_dash,
      spikecolor = spike_col,
      spikethickness = x_spike_thickness,
      showgrid = theme[["grid"]],
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickmode = x_tickmode,
      tickvals = x_tickvals,
      ticktext = x_ticktext,
      tickangle = x_tickangle,
      tickcolor = tick_col,
      tickfont = tick_font,
      zeroline = theme[["zerolines"]],
      zerolinecolor = zero_col,
      zerolinewidth = theme[["zerolines_lwd"]]
    ), # /layout > xaxis
    yaxis = list(
      title = list(
        text = ylab,
        standoff = y_standoff,
        font = f
      ),
      nticks = y_nticks,
      showgrid = theme[["grid"]],
      gridcolor = grid_col,
      gridwidth = theme[["grid_lwd"]],
      tickcolor = tick_col,
      tickfont = tick_font,
      zeroline = theme[["zerolines"]],
      zerolinecolor = zero_col,
      zerolinewidth = theme[["zerolines_lwd"]],
      standoff = y_standoff
    ), # /layout > yaxis
    title = list(
      text = main,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = main_col
      ),
      xref = "paper",
      x = theme[["main_adj"]],
      yref = "paper",
      y = main_y,
      yanchor = main_yanchor
    ),
    legend = list(
      x = legend_x,
      y = legend_y,
      xanchor = legend_xanchor,
      yanchor = legend_yanchor,
      font = list(
        family = theme[["font_family"]],
        size = font_size,
        color = legend_col
      ),
      orientation = legend_orientation,
      bgcolor = "#ffffff00"
    ), # /layout > legend
    paper_bgcolor = bg,
    plot_bgcolor = plot_bg,
    margin = margin,
    hovermode = hovermode
  ) # /layout

  if (!is.null(y2)) {
    plt <- plt |>
      plotly::layout(
        yaxis2 = list(
          overlaying = "y",
          side = "right",
          title = list(
            text = y2lab,
            standoff = y2_standoff,
            font = f
          ),
          tickfont = tick_font
        )
      )
  } # /yaxis2 layout

  # Config ----
  plt <- plotly::config(
    plt,
    displaylogo = FALSE,
    displayModeBar = displayModeBar,
    toImageButtonOptions = list(
      format = modeBar_file_format,
      width = file_width,
      height = file_height
    ),
    scrollZoom = scrollZoom
  ) # /config

  # Write to file ----
  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  } # /export_plotly

  # Rangeslider ----
  if (show_rangeslider) {
    if (is.null(slider_start)) {
      slider_start <- x[[1]][1]
    }
    if (is.null(slider_end)) {
      idi <- min(500, length(x[[1]]))
      slider_end <- x[[1]][idi]
    }
    plt <- plt |>
      plotly::rangeslider(start = slider_start, end = slider_end)
  } # /rangeslider

  return(plt)
} # /rtemis::draw_xt

# tickmode = "array", tickvals: placement, ticktext: labels


================================================
FILE: R/fmt.R
================================================
# fmt.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% fmt ----
#' Text formatting
#'
#' Formats text with specified color, styles, and background using ANSI escape codes or HTML, with support for plain text output.
#'
#' @param x Character: Text to format.
#' @param col Optional Character: Color using hex code or name. If NULL, no color is applied.
#' @param bold Logical: If TRUE, make text bold.
#' @param italic Logical: If TRUE, make text italic.
#' @param underline Logical: If TRUE, underline text.
#' @param thin Logical: If TRUE, make text thin/light.
#' @param muted Logical: If TRUE, make text muted/dimmed.
#' @param bg Optional Character: Background color using hex code or name. If NULL, no background
#' color is applied.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with specified styling.
#'
#' @details
#' This function combines multiple formatting options into a single call,
#' making it more efficient than nested function calls. It generates
#' optimized ANSI escape sequences and clean HTML output.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' # Simple color
#' fmt("Hello", col = "red")
#'
#' # Bold red text
#' fmt("Error", col = "red", bold = TRUE)
#'
#' # Multiple styles
#' fmt("Warning", col = "yellow", bold = TRUE, italic = TRUE)
#'
#' # With background
#' fmt("Highlight", col = "white", bg = "blue", bold = TRUE)
fmt <- function(
  x,
  col = NULL,
  bold = FALSE,
  italic = FALSE,
  underline = FALSE,
  thin = FALSE,
  muted = FALSE,
  bg = NULL,
  pad = 0L,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  out <- switch(
    output_type,
    "ansi" = {
      codes <- character()

      # Style codes
      if (bold) {
        codes <- c(codes, "1")
      } else {
        # Explicitly set normal weight to override message() bold default
        codes <- c(codes, "22")
      }
      if (thin || muted) {
        codes <- c(codes, "2")
      } # Both use dim/faint
      if (italic) {
        codes <- c(codes, "3")
      }
      if (underline) {
        codes <- c(codes, "4")
      }

      # Foreground color
      if (!is.null(col)) {
        tryCatch(
          {
            col_rgb <- col2rgb(col)
            codes <- c(
              codes,
              paste0("38;2;", col_rgb[1], ";", col_rgb[2], ";", col_rgb[3])
            )
          },
          error = function(e) {
            warning("Invalid color '", col, "', ignoring color")
          }
        )
      }

      # Background color
      if (!is.null(bg)) {
        tryCatch(
          {
            bg_rgb <- col2rgb(bg)
            codes <- c(
              codes,
              paste0("48;2;", bg_rgb[1], ";", bg_rgb[2], ";", bg_rgb[3])
            )
          },
          error = function(e) {
            warning("Invalid background color '", bg, "', ignoring background")
          }
        )
      }

      # Generate ANSI sequence
      if (length(codes) > 0) {
        paste0("\033[", paste(codes, collapse = ";"), "m", x, "\033[0m")
      } else {
        x
      }
    },
    "html" = {
      styles <- character()

      # Colors
      if (!is.null(col)) {
        styles <- c(styles, paste0("color: ", col))
      }
      if (!is.null(bg)) {
        styles <- c(styles, paste0("background-color: ", bg))
      }

      # Styles
      if (bold) {
        styles <- c(styles, "font-weight: bold")
      }
      if (thin) {
        styles <- c(styles, "font-weight: lighter")
      }
      if (muted) {
        styles <- c(styles, "color: gray")
      } # Override color for muted
      if (italic) {
        styles <- c(styles, "font-style: italic")
      }
      if (underline) {
        styles <- c(styles, "text-decoration: underline")
      }

      # Generate HTML span
      if (length(styles) > 0) {
        paste0(
          '<span style="',
          paste(styles, collapse = "; "),
          '">',
          x,
          "</span>"
        )
      } else {
        x
      }
    },
    "plain" = x
  ) # /switch
  if (pad > 0L) {
    out <- paste0(strrep(" ", pad), out)
  }
  out
} # /rtemis::fmt


# %% highlight ----
#' Highlight text
#'
#' A `fmt()` convenience wrapper for highlighting text.
#'
#' @param x Character: Text to highlight.
#' @param pad Integer: Number of spaces to pad before text.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with highlight.
#'
#' @author EDG
#' @keywords internal
#' @noRd
highlight <- function(
  x,
  pad = 0L,
  output_type = c("ansi", "html", "plain")
) {
  fmt(x, col = highlight_col, bold = TRUE, pad = pad, output_type = output_type)
} # /rtemis::highlight


# %% highlight2 ----
highlight2 <- function(
  x,
  output_type = c("ansi", "html", "plain")
) {
  fmt(x, col = highlight2_col, bold = FALSE, output_type = output_type)
} # /rtemis::highlight2


# %% bold ----
#' Make text bold
#'
#' A `fmt()` convenience wrapper for making text bold.
#'
#' @param text Character: Text to make bold
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with bold styling
#'
#' @author EDG
#' @keywords internal
#' @noRd
bold <- function(text, output_type = c("ansi", "html", "plain")) {
  fmt(text, bold = TRUE, output_type = output_type)
} # /rtemis::bold


# %% italic ----
#' Make text italic
#'
#' A `fmt()` convenience wrapper for making text italic.
#'
#' @param text Character: Text to make italic
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with italic styling
#'
#' @author EDG
#' @keywords internal
#' @noRd
italic <- function(text, output_type = c("ansi", "html", "plain")) {
  fmt(text, italic = TRUE, output_type = output_type)
} # /rtemis::italic


# %% underline ----
#' Make text underlined
#'
#' A `fmt()` convenience wrapper for making text underlined.
#'
#' @param text Character: Text to underline
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with underline styling
#'
#' @author EDG
#' @keywords internal
#' @noRd
underline <- function(text, output_type = c("ansi", "html", "plain")) {
  fmt(text, underline = TRUE, output_type = output_type)
} # /rtemis::underline


# %% thin ----
#' Make text thin/light
#'
#' A `fmt()` convenience wrapper for making text thin/light.
#'
#' @param text Character: Text to make thin
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with thin/light styling
#'
#' @author EDG
#' @keywords internal
#' @noRd
thin <- function(text, output_type = c("ansi", "html", "plain")) {
  fmt(text, thin = TRUE, output_type = output_type)
} # /rtemis::thin


# %% muted ----
#' Muted text
#'
#' A `fmt()` convenience wrapper for making text muted.
#'
#' @param x Character: Text to format
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with muted styling
#'
#' @author EDG
#' @keywords internal
#' @noRd
muted <- function(x, output_type = c("ansi", "html", "plain")) {
  fmt(x, muted = TRUE, output_type = output_type)
} # /rtemis::muted


# %% gray ----
#' Gray text
#'
#' A `fmt()` convenience wrapper for making text gray.
#'
#' @param x Character: Text to format
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with gray styling
#'
#' @details
#' Can be useful in contexts where muted is not supported.
#'
#' @author EDG
#' @keywords internal
#' @noRd
gray <- function(x, output_type = c("ansi", "html", "plain")) {
  fmt(x, col = "#808080", output_type = output_type)
} # /rtemis::gray


# %% col256 ----
#' Apply 256-color formatting
#'
#' @param text Character: Text to color
#' @param col Character or numeric: Color (ANSI 256-color code, hex for HTML)
#' @param bg Logical: If TRUE, apply as background color
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with 256-color styling
#'
#' @author EDG
#' @keywords internal
#' @noRd
col256 <- function(
  text,
  col = "79",
  bg = FALSE,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  switch(
    output_type,
    "ansi" = {
      if (bg) {
        paste0("\033[48;5;", col, "m", text, "\033[0m")
      } else {
        paste0("\033[38;5;", col, "m", text, "\033[0m")
      }
    },
    "html" = {
      # Convert ANSI color codes to hex colors if needed
      hex_col <- if (
        is.numeric(col) || (is.character(col) && !grepl("^#", col))
      ) {
        ansi256_to_hex(col)
      } else {
        col
      }
      if (bg) {
        paste0(
          '<span style="background-color: ',
          hex_col,
          '">',
          text,
          "</span>"
        )
      } else {
        paste0('<span style="color: ', hex_col, '">', text, "</span>")
      }
    },
    "plain" = text
  )
} # /rtemis::col256


# %% ansi256_to_hex ----
#' Convert ANSI 256 color code to HEX
#'
#' @param code Integer: ANSI 256 color code (0-255).
#'
#' @return Character: HEX color string.
#'
#' @author EDG
#' @keywords internal
#' @noRd
ansi256_to_hex <- function(code) {
  code <- as.integer(code)
  if (is.na(code) || code < 0 || code > 255) {
    return("#000000") # Return black for invalid codes
  }

  # Standard and high-intensity colors (0-15)
  if (code < 16) {
    return(c(
      "#000000",
      "#cd0000",
      "#00cd00",
      "#cdcd00",
      "#0000ee",
      "#cd00cd",
      "#00cdcd",
      "#e5e5e5",
      "#7f7f7f",
      "#ff0000",
      "#00ff00",
      "#ffff00",
      "#5c5cff",
      "#ff00ff",
      "#00ffff",
      "#ffffff"
    )[code + 1])
  }

  # 6x6x6 color cube (16-231)
  if (code >= 16 && code <= 231) {
    code <- code - 16
    r <- floor(code / 36)
    g <- floor((code %% 36) / 6)
    b <- code %% 6
    levels <- c(0, 95, 135, 175, 215, 255) # xterm levels
    return(grDevices::rgb(
      levels[r + 1],
      levels[g + 1],
      levels[b + 1],
      maxColorValue = 255
    ))
  }

  # Grayscale ramp (232-255)
  gray_level <- (code - 232) * 10 + 8
  grDevices::rgb(
    gray_level,
    gray_level,
    gray_level,
    maxColorValue = 255
  )
} # /rtemis::ansi256_to_hex


# %% fmt_gradient ----
#' Gradient text
#'
#' @param x Character: Text to colorize.
#' @param colors Character vector: Colors to use for the gradient.
#' @param bold Logical: If TRUE, make text bold.
#' @param space Character {"rgb", "Lab"}: Color space for gradient interpolation.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Text with gradient color applied.
#'
#' @author EDG
#' @keywords internal
#' @noRd
fmt_gradient <- function(
  x,
  colors,
  bold = FALSE,
  space = "Lab",
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  if (output_type == "plain") {
    return(x)
  }

  # Split text into individual characters
  chars <- strsplit(x, "")[[1]]
  n_chars <- length(chars)

  if (n_chars <= 1) {
    # For single character or empty string, use first color
    return(fmt(x, col = colors[1], output_type = output_type))
  }

  # Generate gradient colors using colorRampPalette
  tryCatch(
    {
      gradient_colors <- grDevices::colorRampPalette(colors, space = space)(
        n_chars
      )
    },
    error = function(e) {
      warning("Invalid gradient colors, using default")
      x
    }
  )

  # Apply gradient colors to each character
  gradient_chars <- character(n_chars)
  for (i in seq_len(n_chars)) {
    gradient_chars[i] <- fmt(
      chars[i],
      col = gradient_colors[i],
      bold = bold,
      output_type = output_type
    )
  }

  # Combine all colored characters
  paste(gradient_chars, collapse = "")
} # /rtemis::fmt_gradient


# %% map_value_to_color ----
#' Map numeric value to color
#'
#' Maps a numeric value to a color based on a specified range and color palette using `fmt`
#' for formatting. Useful for visualizing numeric values in text output.
#'
#' @param x Numeric: Value to map to a color.
#' @param range Numeric vector of length 2: Minimum and maximum values for mapping.
#' @param colors Character vector: Colors to use for the gradient mapping.
#' @param space Character {"rgb", "Lab"}: Color space for gradient interpolation.
#' @param bold Logical: If TRUE, make text bold.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted text with color corresponding to the numeric value.
#'
#' @author EDG
#' @keywords internal
#' @noRd
map_value_to_color <- function(
  x,
  range = c(0, 1),
  colors = c("#ff9f20", "#00b2b2"),
  space = "Lab",
  bold = TRUE,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)
  if (output_type == "plain") {
    return(as.character(x))
  }
  if (!is.numeric(x) || length(x) != 1L || is.na(x)) {
    cli::cli_abort("`x` must be a single non-missing numeric value.")
  }
  if (!is.numeric(range) || length(range) != 2L || anyNA(range)) {
    cli::cli_abort(
      "`range` must be a numeric vector of length 2 with no missing values."
    )
  }
  if (range[1] >= range[2]) {
    cli::cli_abort("`range[1]` must be strictly less than `range[2]`.")
  }
  if (!is.character(colors) || length(colors) < 2L || anyNA(colors)) {
    cli::cli_abort(
      "`colors` must be a character vector of at least 2 non-missing colors."
    )
  }
  # Check x is within range
  if (x < range[1] || x > range[2]) {
    cli::cli_abort(
      "Value {x} is out of range [{range[1]}, {range[2]}]"
    )
  }

  n_colors <- 256L
  gradient <- tryCatch(
    {
      grDevices::colorRampPalette(colors, space = space)(n_colors)
    },
    error = function(e) {
      cli::cli_abort("Invalid `colors` specification.")
    }
  )

  p <- (x - range[1]) / (range[2] - range[1])
  idx <- as.integer(round(p * (n_colors - 1L))) + 1L
  idx <- max(1L, min(n_colors, idx))

  fmt(
    as.character(x),
    col = gradient[idx],
    bold = bold,
    output_type = output_type
  )
} # /rtemis::map_value_to_color


================================================
FILE: R/ifw.R
================================================
# ifw.R
# ::rtemis::
# 2025- EDG rtemis.org

#' Inverse Frequency Weighting
#'
#' @param y Vector: Outcome
#' @param type Character: "case_weights" or "class_weights". What to return.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Numeric vector of weights.
#'
#' @keywords internal
#' @noRd
#' @author EDG
#'
#' @examples
#' y <- factor(sample(c("A", "B"), size = 100, replace = TRUE, prob = c(.1, .9)))
#' ifw(y)
#' ifw(y, type = "class_weights")
ifw <- function(y, type = c("case_weights", "class_weights"), verbosity = 1L) {
  stopifnot(is.factor(y))
  type <- match.arg(type)
  if (verbosity > 0L) {
    msg(
      "Calculating",
      sub("_", " ", type),
      "using Inverse Frequency Weighting."
    )
  }

  # Class weights ----
  inverse_proportions <- 1 / (table(y) / NROW(y))
  class_weights <- structure(
    as.numeric(inverse_proportions / min(inverse_proportions)),
    names = names(inverse_proportions)
  )

  if (type == "class_weights") {
    out <- class_weights
    stopifnot(length(out) == nlevels(y))
  } else {
    out <- class_weights[as.integer(y)]
    stopifnot(length(out) == length(y))
  }
  out
} # /rtemis::ifw


================================================
FILE: R/massGLM.R
================================================
# massGLM.R
# ::rtemis::
# 2021- EDG rtemis.org

#' Mass-univariate GLM Analysis
#'
#' @param x tabular data: Predictor variables. Usually a small number of covariates.
#' @param y data.frame or similar: Each column is a different outcome. The function will train one
#' GLM for each column of `y`. Usually a large number of features.
#' @param scale_y Logical: If TRUE, scale each column of `y` to have mean 0 and sd 1. If `NULL`,
#' defaults to TRUE if `y` is numeric, FALSE otherwise.
#' @param center_y Logical: If TRUE, center each column of `y` to have mean 0. If `NULL`, defaults
#' to TRUE if `scale_y` is TRUE, FALSE otherwise.
# @param include_anova Logical: If TRUE, include ANOVA results in the summary.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `MassGLM` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' set.seed(2022)
#' y <- rnormmat(500, 40, return_df = TRUE)
#' x <- data.frame(
#'   x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500),
#'   x2 = y[[21]] + rnorm(500)
#' )
#' massmod <- massGLM(x, y)
#' # Print table of coefficients, p-values, etc. for all models
#' summary(massmod)
massGLM <- function(
  x,
  y,
  scale_y = NULL,
  center_y = NULL,
  # include_anova = TRUE,
  verbosity = 1L
) {
  # Init ----
  start_time <- intro(verbosity = verbosity)

  # Check y ----
  # all y columns must be numeric or all factors with 2 levels
  y_class <- sapply(y, class)
  if (y_class[1] == "numeric") {
    # Check all are numeric
    if (!all(y_class == "numeric")) {
      cli::cli_abort(
        "All columns of y must be the same type: either numeric or factors with 2 levels"
      )
    }
    .family <- "gaussian"
  } else if (y_class[1] == "factor") {
    n_levels <- sapply(y, nlevels)
    if (!all(n_levels == 2)) {
      cli::cli_abort("All factor columns of y must have 2 levels")
    }
    .family <- "binomial"
  } else {
    cli::cli_abort(
      "All columns of y must be either numeric or factors with 2 levels. Found: {.val {y_class}}"
    )
  }

  # Preprocessing ----
  if (is.null(scale_y)) {
    scale_y <- if (y_class[1] == "numeric") {
      TRUE
    } else {
      FALSE
    }
  }
  if (is.null(center_y)) {
    center_y <- if (scale_y) {
      TRUE
    } else {
      FALSE
    }
  }
  if (scale_y || center_y) {
    y <- preprocess(
      y,
      config = setup_Preprocessor(scale = scale_y, center = center_y),
      verbosity = verbosity
    )[["preprocessed"]]
  }

  # Data ----
  xnames <- colnames(x)
  ynames <- colnames(y)
  dat <- data.table(x, y)

  # fit1: Loop function ----
  fit1 <- function(index, dat, family, ynames) {
    formula1 <- as.formula(paste(
      ynames[index],
      "~",
      paste(xnames, collapse = " + ")
    ))
    mod1 <- glm(formula1, family = family, data = dat)
    glm2table(list(mod1), xnames = ynames[index], include_anova = NA)
  }

  # Fit models ----
  if (verbosity > 0L) {
    msg(
      "Fitting",
      highlight(length(ynames)),
      "GLMs of family",
      bold(.family),
      "with",
      highlight(length(xnames)),
      ngettext(length(xnames), "predictor", "predictors"),
      "each..."
    )
  }
  tbls <- lapply(
    cli::cli_progress_along(seq_along(y), name = "GLMs", type = "tasks"),
    function(i) {
      fit1(index = i, dat = dat, family = .family, ynames = ynames)
    }
  )
  tbl <- rbindlist(tbls)

  # MassGLM ----
  # ynames should be the same as tbl[["Variable"]]
  # <> Check in MassGLM constructor
  if (!all(ynames == tbl[["Variable"]])) {
    cli::cli_warn(c(
      "The names of the outcome variables in y ({.val ynames}) do not match the names in the summary table ({.val summary[['Variable']]})",
      "Check the summary table."
    ))
  }
  outro(start_time)
  MassGLM(
    summary = tbl,
    ynames = ynames,
    xnames = xnames,
    coefnames = gsub("Coefficient_", "", getnames(tbl, "Coefficient")),
    family = .family
  )
} # /rtemis::massGLM


================================================
FILE: R/metrics.R
================================================
# metrics.R
# ::rtemis::
# 2019- EDG rtemis.org

#' Error functions
#'
#' Convenience functions for calculating loss.
#' These can be passed as arguments to learners that support custom loss functions.
#'
#' @rdname error
#' @param x Vector of True values
#' @param y Vector of predicted values
#' @param na.rm Logical: If TRUE, remove NA values before computation.
#'
#' @author EDG
#' @keywords internal
#' @noRd
mae <- function(x, y, na.rm = TRUE) {
  error <- x - y
  mean(abs(error), na.rm = na.rm)
} # /rtemis::mae

#' @rdname error
#' @keywords internal
#' @noRd
mse <- function(x, y, na.rm = TRUE) {
  error <- x - y
  mean(error^2, na.rm = na.rm)
} # /rtemis::mse

#' Weighted MSE
#'
#' @rdname error
#' @keywords internal
#' @noRd
msew <- function(x, y, weights = rep(1, length(y)), na.rm = TRUE) {
  error <- x - y
  error <- error * weights
  mean(error^2, na.rm = na.rm)
} # /rtemis::msew

#' @rdname error
#' @keywords internal
#' @noRd
rmse <- function(x, y, na.rm = TRUE) {
  sqrt(mse(x, y, na.rm = na.rm))
} # /rtemis::rmse

#' R-squared
#'
#' @param x Float, vector: True values
#' @param y Float, vector: predicted values
#' @author EDG
#' @keywords internal
#' @noRd
rsq <- function(x, y) {
  SSE <- sum((x - y)^2)
  # Sum of Squares due to Regression (SSR) a.k.a. Explained Sum of Squares (ESS)
  # SSR <- sum((mean(x) - y)^2)
  # Total Sum of Squares (TSS or SST)
  SST <- sum((x - mean(x))^2)
  # R-squared a.k.a. Coefficient of Determination i.e. percent variance explained
  1 - (SSE / SST)
} # /rtemis::rsq


#' Log Loss for a binary classifier
#'
#' @param true_int Integer vector, {0, 1}: True labels (1 is the positive class).
#' @param predicted_prob Float, vector: predicted probabilities.
#' @param eps Float: Small value to prevent log(0).
#'
#' @author EDG
#' @keywords internal
#' @noRd
logloss <- function(true_int, predicted_prob, eps = 1e-16) {
  predicted_prob <- pmax(pmin(predicted_prob, 1 - eps), eps)
  -mean(
    true_int * log(predicted_prob) + (1 - true_int) * log(1 - predicted_prob)
  )
} # /rtemis::logloss


#' Sensitivity
#'
#' The first factor level is considered the positive case.
#'
#' @param true Factor: True labels.
#' @param predicted Factor: Predicted labels.
#' @param harmonize Logical: If TRUE, run `factor_harmonize` first.
#' @param verbosity Integer: Verbosity level.
#'
#' @author EDG
#' @keywords internal
#' @noRd
sensitivity <- function(true, predicted, harmonize = FALSE, verbosity = 1L) {
  if (harmonize) {
    predicted <- factor_harmonize(true, predicted, verbosity = verbosity)
  }
  pos_index <- true == levels(true)[1]
  condition_pos <- sum(pos_index)
  true_pos <- sum(true[pos_index] == predicted[pos_index])
  true_pos / condition_pos
}


#' Specificity
#'
#' The first factor level is considered the positive case.
#'
#' @param true True labels
#' @param predicted predicted labels
#' @param harmonize Logical: If TRUE, run `factor_harmonize` first
#' @param verbosity Integer: Verbosity level.
#'
#' @keywords internal
#' @noRd
specificity <- function(true, predicted, harmonize = FALSE, verbosity = 1L) {
  if (harmonize) {
    predicted <- factor_harmonize(true, predicted, verbosity = verbosity)
  }
  neg_index <- true == levels(true)[2]
  condition_neg <- sum(neg_index)
  true_neg <- sum(true[neg_index] == predicted[neg_index])
  true_neg / condition_neg
}

#' Balanced Accuracy
#'
#' Balanced Accuracy of a binary classifier
#'
#' BAcc = .5 * (Sensitivity + Specificity)
#'
#' @param true Factor: True labels.
#' @param predicted Factor: Predicted labels.
#' @param harmonize Logical: passed to `sensitivity()` and `specificity`, which use `factor_harmonize`.
#' @param verbosity Integer: Verbosity level.
#'
#' @keywords internal
#' @noRd
bacc <- function(true, predicted, harmonize = FALSE, verbosity = 1L) {
  0.5 *
    (sensitivity(
      true,
      predicted,
      harmonize = harmonize,
      verbosity = verbosity
    ) +
      specificity(
        true,
        predicted,
        harmonize = harmonize,
        verbosity = verbosity
      ))
} # /rtemis::bacc

#' Precision (aka PPV)
#'
#' The first factor level is considered the positive case.
#'
#' @param true Factor: True labels
#' @param predicted Factor: predicted labels
#' @param harmonize Logical: If TRUE, run `factor_harmonize` first
#' @param verbosity Integer: Verbosity level.
#'
#' @keywords internal
#' @noRd
precision <- function(true, predicted, harmonize = FALSE, verbosity = 1L) {
  if (harmonize) {
    predicted <- factor_harmonize(true, predicted, verbosity = verbosity)
  }
  tbl <- table(predicted, true)
  predicted_totals <- rowSums(tbl)[1]
  hits <- diag(tbl)[1]

  if (hits == 0 && predicted_totals == 0) {
    1
  } else {
    hits / predicted_totals
  }
} # /rtemis::precision

#' Factor harmonize
#'
#' @param reference Reference factor.
#' @param x Input factor.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Factor: x with levels in the same order as reference.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
factor_harmonize <- function(reference, x, verbosity = 1L) {
  if (!is.factor(x) || !is.factor(reference)) {
    cli::cli_abort("Inputs must be factors")
  }
  if (!all(levels(x) == levels(reference))) {
    if (!all(levels(x) %in% levels(reference))) {
      if (verbosity > 0L) {
        msg("Levels of x:")
      }
      levels(x)
      if (verbosity > 0L) {
        msg("levels of reference:")
      }
      levels(reference)
      cli::cli_abort("Levels of two inputs do not match")
    }
    if (verbosity > 0L) {
      msg("Input factor levels are not in the same order, correcting")
    }
    x <- factor(x, levels = levels(reference))
  }
  x
} # /rtemis::factor_harmonize


#' F1 score
#'
#' Calculate the F1 score for classification:
#'
#' \deqn{F1 = 2 \frac{Recall \cdot Precision}{Recall + Precision}}{F1 = 2 * (Recall * Precision)/(Recall + Precision)}
#'
#' @param recall Float \[0, 1\]: Recall a.k.a. Sensitivity
#' @param precision Float \[0, 1\]: Precision a.k.a. Positive Predictive Value
#'
#' @author EDG
#' @keywords internal
#' @noRd
f1 <- function(precision, recall) {
  2 * (recall * precision) / (recall + precision)
} # /rtemis::f1


# auc.R
# ::rtemis::
# 2019-23 EDG rtemis.org

#' Area under the ROC Curve
#'
#' Get the Area under the ROC curve to assess classifier performance.
#'
#' @param true_int Integer vector: True labels of outcomes (e.g. c(0, 1, 1))
#' @param predicted_prob Numeric Vector: Probabilities or model scores
#' (e.g. c(.32, .75, .63), etc)
#' @param method Character {"lightAUC" "ROCR"}: Package to use.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Numeric.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' preds <- c(0.7, 0.55, 0.45, 0.25, 0.6, 0.7, 0.2)
#' labels <- 2L - as.integer(factor(c("a", "a", "a", "b", "b", "b", "b")))
#' auc(labels, preds, method = "lightAUC")
#' auc(labels, preds, method = "ROCR")
auc <- function(
  true_int,
  predicted_prob,
  method = c("lightAUC", "ROCR"),
  verbosity = 0L
) {
  # Checks ----
  method <- match.arg(method)
  check_inherits(true_int, "integer")
  check_float01inc(predicted_prob)
  # method <- match.arg(method)
  if (length(unique(true_int)) == 1) {
    return(NaN)
  }
  if (method == "lightAUC") {
    check_dependencies("lightAUC")
    auc. <- lightAUC::lightAUC(probs = predicted_prob, actuals = true_int)
  } else if (method == "ROCR") {
    check_dependencies("ROCR")
    .pred <- try(ROCR::prediction(
      predicted_prob,
      true_int,
      label.ordering = NULL
    ))
    auc. <- try(ROCR::performance(.pred, "auc")@y.values[[1]])
  }

  if (inherits(auc., "try-error")) {
    auc. <- NaN
  }

  if (verbosity > 0L) {
    msg("AUC =", auc.)
  }
  auc.
} # /rtemis::auc


#' Area under the Curve by pairwise concordance
#'
#' Get the Area under the ROC curve to assess classifier performance using pairwise concordance
#'
#' The first level of `true.labels` must be the positive class, and high numbers in
#' `estimated.score` should correspond to the positive class.
#'
#' @param estimated.score Float, Vector: Probabilities or model scores
#' (e.g. c(.32, .75, .63), etc)
#' @param true.labels True labels of outcomes (e.g. c(0, 1, 1))
#' @param verbosity Integer: Verbosity level.
#'
#' @examples
#' true.labels <- factor(c("a", "a", "a", "b", "b", "b", "b"))
#' estimated.score <- c(0.7, 0.55, 0.45, 0.25, 0.6, 0.7, 0.2)
#' auc_pairs(estimated.score, true.labels, verbosity = 1L)
#'
#' @keywords internal
#' @noRd
auc_pairs <- function(estimated.score, true.labels, verbosity = 1L) {
  true.labels <- as.factor(true.labels)
  true.levels <- levels(true.labels)
  n.levels <- length(true.levels)
  if (n.levels == 2) {
    outer.diff <- outer(
      estimated.score[true.labels == true.levels[1]],
      estimated.score[true.labels == true.levels[2]],
      "-"
    )
    .auc <- mean((outer.diff > 0) + .5 * (outer.diff == 0))
  } else {
    cli::cli_abort(
      "Multiclass AUC does not have a unique definition and is not yet implemented"
    )
  }
  if (verbosity > 0L) {
    msg("Positive class:", true.levels[1])
    msg("AUC =", .auc)
  }
  invisible(.auc)
} # /rtemis::auc_pairs


#' Brier_Score
#'
#' Calculate the Brier_Score for classification:
#'
#' \deqn{BS = \frac{1}{N} \sum_{i=1}^{N} (y_i - p_i)^2}{BS = 1/N * sum_{i=1}^{N} (y_i - p_i)^2}
#'
#' @param true_int Integer vector, {0, 1}: True labels
#' @param predicted_prob Numeric vector, \[0, 1\]: predicted probabilities
#'
#' @author EDG
#' @keywords internal
#' @noRd
brier_score <- function(true_int, predicted_prob) {
  true_int <- clean_int(true_int)
  check_float01inc(predicted_prob)
  mean((true_int - predicted_prob)^2)
} # /rtemis::brier_score

#' Convert labels to integers
#'
#' Convert factor labels to integers where the positive class is 1 and the negative class is 0.
#'
#' @param x Factor: True labels.
#' @param binclasspos Integer: Position of the factor level which is the positive class (binary classification only).
#'
#' @return Integer vector: 0, 1 where 1 is the positive class as defined by binclasspos.
#'
#' @author EDG
#' @keywords internal
#' @noRd
labels2int <- function(x, binclasspos = 2L) {
  stopifnot(is.factor(x))
  # Convert factor to 0, 1 where 1 is the positive class as defined by binclasspos
  if (binclasspos == 1L) {
    xi <- 2L - as.integer(x)
  } else {
    xi <- as.integer(x) - 1L
  }
  xi
} # /rtemis::labels2int

# classification_metrics() ----
#' Classification Metrics
#'
#' @details
#' Note that auc_method = "pROC" is the only one that will output an AUC even if
#' one or more predicted probabilities are NA.
#'
#' @param true_labels Factor: True labels.
#' @param predicted_labels Factor: predicted values.
#' @param predicted_prob Numeric vector: predicted probabilities.
#' @param binclasspos Integer: Factor level position of the positive class in binary classification.
#' @param calc_auc Logical: If TRUE, calculate AUC. May be slow in very large datasets.
#' @param calc_brier Logical: If TRUE, calculate Brier_Score.
#' @param auc_method Character: "lightAUC", "pROC", "ROCR".
#' @param sample Character: Sample name.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `ClassificationMetrics` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # Assume positive class is "b"
#' true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b"))
#' predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a"))
#' predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39)
#'
#' classification_metrics(true_labels, predicted_labels, predicted_prob)
#' classification_metrics(true_labels, predicted_labels, 1 - predicted_prob, binclasspos = 1L)
classification_metrics <- function(
  true_labels,
  predicted_labels,
  predicted_prob = NULL,
  binclasspos = 2L,
  calc_auc = TRUE,
  calc_brier = TRUE,
  auc_method = "lightAUC",
  sample = character(),
  verbosity = 0L
) {
  # Checks ----
  # Binary class probabilities only for now
  if (length(predicted_prob) > length(true_labels)) {
    predicted_prob <- NULL
  }
  n_classes <- nlevels(true_labels)

  # Check same levels in
  if (!all(levels(true_labels) == levels(predicted_labels))) {
    cli::cli_abort(
      "True and predicted labels must have the same levels, in the same order.",
      "\n     levels(true_labels): ",
      paste(levels(true_labels), collapse = ", "),
      "\nlevels(predicted_labels): ",
      paste(levels(predicted_labels), collapse = ", ")
    )
  }

  # Positive class ----
  # For confusion table, make positive class the first factor level
  if (n_classes == 2 && binclasspos == 2L) {
    true_labels <- factor(true_labels, levels = rev(levels(true_labels)))
    predicted_labels <- factor(
      predicted_labels,
      levels = rev(levels(predicted_labels))
    )
  }
  true_levels <- levels(true_labels)

  # Levels already set so that the first level is the positive class
  Positive_Class <- if (n_classes == 2) true_levels[1] else NA
  if (verbosity > 0L) {
    if (n_classes == 2) {
      msg(
        "There are two outcome classes:",
        highlight(paste(rev(true_levels), collapse = ", "))
      )
      msg("        The positive class is:", highlight(Positive_Class))
    } else {
      msg(
        "There are",
        n_classes,
        "classes:",
        highlight(paste(rev(true_levels), collapse = ", "))
      )
    }
  }
  tbl <- table(true_labels, predicted_labels)
  # attr(tbl, "dimnames") <- list(Reference = true_levels, Predicted = true_levels)
  names(attributes(tbl)[["dimnames"]]) <- c("Reference", "Predicted")

  Class <- list()
  Overall <- list()
  Class[["Totals"]] <- rowSums(tbl)
  Class[["Predicted_totals"]] <- colSums(tbl)
  Total <- sum(tbl)
  Class[["Hits"]] <- diag(tbl)
  # Class[["Misses"]] <- Class[["Totals"]] - Class[["Hits"]]
  Class[["Sensitivity"]] <- Class[["Hits"]] / Class[["Totals"]]
  Class[["Condition_negative"]] <- Total - Class[["Totals"]]
  Class[["True_negative"]] <- Total -
    Class[["Predicted_totals"]] -
    (Class[["Totals"]] - Class[["Hits"]])
  Class[["Specificity"]] <- Class[["True_negative"]] /
    Class[["Condition_negative"]]
  Class[["Balanced_Accuracy"]] <- .5 *
    (Class[["Sensitivity"]] + Class[["Specificity"]])
  # PPV = true positive / predicted condition positive
  Class[["PPV"]] <- Class[["Hits"]] / Class[["Predicted_totals"]]
  # NPV  = true negative / predicted condition negative
  Class[["NPV"]] <- Class[["True_negative"]] /
    (Total - Class[["Predicted_totals"]])
  Class[["F1"]] <- 2 *
    (Class[["PPV"]] * Class[["Sensitivity"]]) /
    (Class[["PPV"]] + Class[["Sensitivity"]])

  # Binary vs Multiclass ----
  if (n_classes == 2) {
    Overall[["Sensitivity"]] <- Class[["Sensitivity"]][1]
    Overall[["Specificity"]] <- Class[["Specificity"]][1]
    Overall[["Balanced_Accuracy"]] <- Class[["Balanced_Accuracy"]][1]
    Overall[["PPV"]] <- Class[["PPV"]][1]
    Overall[["NPV"]] <- Class[["NPV"]][1]
    Overall[["F1"]] <- Class[["F1"]][1]
  } else {
    Overall[["Balanced_Accuracy"]] <- mean(Class[["Sensitivity"]])
    Overall[["F1"]] <- mean(Class[["F1"]])
  }
  Overall[["Accuracy"]] <- sum(Class[["Hits"]]) / Total

  # Probability-based metrics ----
  if (!is.null(predicted_prob) && n_classes == 2L) {
    # Positive class has been set to first level
    true_int <- 2L - as.integer(true_labels)
    if (calc_auc) {
      Overall[["AUC"]] <- auc(
        true_int = true_int,
        predicted_prob = predicted_prob,
        method = auc_method
      )
    }
    if (calc_brier) {
      Overall[["Brier_Score"]] <- brier_score(true_int, predicted_prob)
    }
    # Overall[["Log loss"]] <- logloss(true_int, predicted_prob)
  }

  # Outro ----
  Overall <- as.data.frame(do.call(cbind, Overall))
  rownames(Overall) <- "Overall"
  Class <- (data.frame(
    Sensitivity = Class[["Sensitivity"]],
    Specificity = Class[["Specificity"]],
    Balanced_Accuracy = Class[["Balanced_Accuracy"]],
    PPV = Class[["PPV"]],
    NPV = Class[["NPV"]],
    F1 = Class[["F1"]]
  ))

  ClassificationMetrics(
    sample = sample,
    Confusion_Matrix = tbl,
    Overall = Overall,
    Class = Class,
    Positive_Class = Positive_Class
  )
} # /rtemis::classification_metrics


# regression_metrics() ----
#' Regression Metrics
#'
#' @param true Numeric vector: True values.
#' @param predicted Numeric vector: Predicted values.
#' @param na.rm Logical: If TRUE, remove NA values before computation.
#' @param sample Character: Sample name (e.g. "training", "test").
#'
#' @return `RegressionMetrics` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' true <- rnorm(100)
#' predicted <- true + rnorm(100, sd = 0.5)
#' regression_metrics(true, predicted)
regression_metrics <- function(
  true,
  predicted,
  na.rm = TRUE,
  sample = NULL
) {
  RegressionMetrics(
    MAE = mae(true, predicted, na.rm = na.rm),
    MSE = mse(true, predicted, na.rm = na.rm),
    RMSE = rmse(true, predicted, na.rm = na.rm),
    Rsq = rsq(true, predicted),
    sample = sample
  )
} # /rtemis::regression_metrics


================================================
FILE: R/msg.R
================================================
# msg.R
# ::rtemis::
# 2016- EDG rtemis.org

#' Get current date and time
#'
#' @details
#' used by msgdatetime, log_to_file
#'
#' @param datetime_format Character: Format for the date and time.
#'
#' @return Character: Formatted date and time.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' datetime()
datetime <- function(datetime_format = "%Y-%m-%d %H:%M:%S") {
  format(Sys.time(), datetime_format)
}


#' Message datetime()
#'
#' @param datetime_format Character: Format for the date and time.
#'
#' @return Character: Formatted date and time.
#'
#' @author EDG
#' @keywords internal
#' @noRd
# Used by msg(), msg0(), msgstart()
msgdatetime <- function(datetime_format = "%Y-%m-%d %H:%M:%S") {
  message(gray(paste0(datetime(), " ")), appendLF = FALSE)
}


#' Info msg
#'
#' @author EDG
#' @keywords internal
#' @noRd
msg_info <- function(..., format_fn = highlight2, verbosity = 1L) {
  msg0(..., format_fn = format_fn, caller_id = 2, verbosity = verbosity)
}


#' Dispatch to the registered message sink, if any
#'
#' Internal helper used by `msg()`, `msg0()`, `msgstart()`, `msgdone()`.
#' Returns TRUE if a sink consumed the event (caller should skip the console
#' output path), FALSE if no sink is registered (caller should write to console
#' as usual).
#'
#' @param text Character: the formatted message text (no datetime prefix).
#' @param caller Character or NA: calling function name from `format_caller()`.
#' @param ts Character: formatted timestamp from `datetime()`.
#' @param level Character: one of `"info"`, `"start"`, `"done"`.
#'
#' @return Logical scalar.
#'
#' @author EDG
#' @keywords internal
#' @noRd
.msg_to_sink <- function(text, caller, ts, level) {
  sink <- live[["msg_sink"]]
  if (is.null(sink)) {
    return(FALSE)
  }
  sink(list(text = text, caller = caller, ts = ts, level = level))
  TRUE
}


#' Format caller
#'
#' @author EDG
#' @keywords internal
#' @noRd
format_caller <- function(call_stack, call_depth, caller_id, max_char = 30L) {
  stack_length <- length(call_stack)
  if (stack_length < 2) {
    caller <- NA
  } else {
    call_depth <- call_depth + caller_id
    if (call_depth > stack_length) {
      call_depth <- stack_length
    }
    caller <- paste(
      lapply(
        rev(seq(call_depth)[-seq(caller_id)]),
        function(i) rev(call_stack)[[i]][[1]]
      ),
      collapse = ">>"
    )
  }
  # do.call and similar will change the call stack, it will contain the full
  # function definition instead of the name alone
  # Capture S7 method calls
  if (!is.na(caller) && substr(caller, 1, 8) == "`method(") {
    caller <- sub("`method\\(([^,]+),.*\\)`", "\\1", caller)
  }
  if (is.function(caller)) {
    # Try to get function name from call stack context
    caller <- tryCatch(
      {
        # Get the original call stack element as character
        call_str <- deparse(rev(call_stack)[[rev(seq(call_depth)[
          -seq(caller_id)
        ])[1]]])
        # Extract function name from the call
        fn_match <- regexpr("^[a-zA-Z_][a-zA-Z0-9_\\.]*", call_str)
        if (fn_match > 0) {
          regmatches(call_str, fn_match)
        } else {
          "(fn)"
        }
      },
      error = function(e) "(fn)"
    )
  }
  if (is.character(caller)) {
    if (nchar(caller) > 30) caller <- paste0(substr(caller, 1, 27), "...")
  }
  caller
} # /rtemis::format_caller


#' Message with provenance
#'
#' Print message to output with a prefix including data and time, and calling function or full
#' call stack
#'
#' If `msg` is called directly from the console, it will print `[interactive>]` in place of
#'   the call stack.
#' `msg0`, similar to `paste0`, is `msg(..., sep = "")`
#'
#'
#' @param ... Message to print
#' @param caller Character: Name of calling function
#' @param call_depth Integer: Print the system call path of this depth.
#' @param caller_id Integer: Which function in the call stack to print
#' @param newline_pre Logical: If TRUE begin with a new line.
#' @param newline Logical: If TRUE end with a new line.
#' @param format_fn Function: Formatting function to use on the message text.
#' @param sep Character: Use to separate objects in `...`
#' @param verbosity Integer: Verbosity level of the message. If 0L, does not print anything and
#' returns NULL, invisibly.
#'
#' @return If verbosity > 0L, returns a list with call, message, and date, invisibly, otherwise
#' returns NULL invisibly.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' msg("Hello")
msg <- function(
  ...,
  caller = NULL,
  call_depth = 1L,
  caller_id = 1L,
  newline_pre = FALSE,
  newline = TRUE,
  format_fn = plain,
  sep = " ",
  verbosity = 1L
) {
  if (verbosity < 1L) {
    return(invisible(NULL))
  }
  if (is.null(caller)) {
    call_stack <- as.list(sys.calls())
    caller <- format_caller(call_stack, call_depth, caller_id)
  } # / get caller

  txt <- Filter(Negate(is.null), list(...))
  text <- paste(txt, collapse = sep)

  # Sink path: hand structured event to registered sink, skip console.
  if (.msg_to_sink(text, caller, datetime(), "info")) {
    return(invisible(NULL))
  }

  if (newline_pre) {
    message("")
  }
  msgdatetime()
  message(
    format_fn(text),
    appendLF = FALSE
  )
  if (!is.null(caller) && !is.na(caller) && nchar(caller) > 0L) {
    message(plain(gray(paste0(" [", caller, "]"))))
  } else if (newline) {
    message("")
  }
} # /rtemis::msg


#' @rdname msg
#'
#' @keywords internal
#' @noRd
msg0 <- function(
  ...,
  caller = NULL,
  call_depth = 1,
  caller_id = 1,
  newline_pre = FALSE,
  newline = TRUE,
  format_fn = plain,
  sep = "",
  verbosity = 1L
) {
  if (verbosity < 1L) {
    return(invisible(NULL))
  }
  if (is.null(caller)) {
    call_stack <- as.list(sys.calls())
    caller <- format_caller(call_stack, call_depth, caller_id)
  }

  txt <- Filter(Negate(is.null), list(...))
  text <- paste(txt, collapse = sep)

  if (.msg_to_sink(text, caller, datetime(), "info")) {
    return(invisible(NULL))
  }

  if (newline_pre) {
    message("")
  }
  msgdatetime()
  message(
    format_fn(text),
    appendLF = FALSE
  )
  if (!is.null(caller) && !is.na(caller) && nchar(caller) > 0L) {
    message(plain(gray(paste0(" [", caller, "]"))))
  } else if (newline) {
    message("")
  }
} # /rtemis::msg0


#' msgstart
#'
#' @inheritParams msg
#'
#' @author EDG
#' @keywords internal
#' @noRd
msgstart <- function(
  ...,
  newline_pre = FALSE,
  sep = ""
) {
  txt <- Filter(Negate(is.null), list(...))
  text <- paste(txt, collapse = sep)

  if (.msg_to_sink(text, NA_character_, datetime(), "start")) {
    return(invisible(NULL))
  }

  if (newline_pre) {
    message()
  }
  msgdatetime()
  message(plain(text), appendLF = FALSE)
} # /rtemis::msgstart


#' msgdone
#'
#' @inheritParams msg
#'
#' @author EDG
#' @keywords internal
#' @noRd
msgdone <- function(caller = NULL, call_depth = 1, caller_id = 1, sep = " ") {
  if (is.null(caller)) {
    call_stack <- as.list(sys.calls())
    caller <- format_caller(call_stack, call_depth, caller_id)
  }

  if (.msg_to_sink("done", caller, datetime(), "done")) {
    return(invisible(NULL))
  }

  message(" ", appendLF = FALSE)
  yay(end = " ")
  message(gray(paste0("[", caller, "]\n")), appendLF = FALSE)
} # /rtemis::msgdone


# %% Message sink API ---------------------------------------------------------

#' Set the rtemis message sink
#'
#' When set, `msg()`, `msg0()`, `msgstart()`, and `msgdone()` forward their
#' structured output through `sink` instead of writing to the R console. Used
#' by `rtemislive` to capture training-time messages and forward them over a
#' WebSocket connection. Pass `NULL` to restore default console output.
#'
#' The sink function is called once per message with a single argument: a list
#' with fields
#'
#' - `text`: character. The formatted message body (no datetime prefix).
#' - `caller`: character or `NA`. Calling function as identified by
#'   `format_caller()`.
#' - `ts`: character. Formatted timestamp (`"%Y-%m-%d %H:%M:%S"`).
#' - `level`: character. One of `"info"` (`msg`/`msg0`), `"start"`
#'   (`msgstart`), or `"done"` (`msgdone`).
#'
#' When a sink is set, the console output path is **skipped** for affected
#' calls. Errors thrown by the sink propagate to the caller of `msg()`.
#'
#' @param sink Function or `NULL`.
#'
#' @return Previous sink (function or `NULL`), invisibly.
#'
#' @author EDG
#' @export
#'
#' @seealso [get_msg_sink()], [with_msg_sink()].
#'
#' @examples
#' captured <- list()
#' set_msg_sink(function(m) captured[[length(captured) + 1L]] <<- m)
#' # msg("hello world")        # would append to `captured`
#' set_msg_sink(NULL)          # restore console output
set_msg_sink <- function(sink) {
  if (!is.null(sink) && !is.function(sink)) {
    cli::cli_abort("`sink` must be a function or NULL.")
  }
  old <- live[["msg_sink"]]
  live[["msg_sink"]] <- sink
  invisible(old)
} # /rtemis::set_msg_sink


#' Get the current rtemis message sink
#'
#' @return The currently registered sink function, or `NULL` if none is set.
#'
#' @author EDG
#' @export
#'
#' @seealso [set_msg_sink()], [with_msg_sink()].
get_msg_sink <- function() {
  live[["msg_sink"]]
} # /rtemis::get_msg_sink


#' Run code with a temporary message sink
#'
#' Sets `sink` for the duration of `code`, restoring the previous sink on exit
#' (including on error). Useful in tests and for short-lived capture.
#'
#' @param sink Sink function or `NULL`.
#' @param code Code to run.
#'
#' @return The value returned by `code`.
#'
#' @author EDG
#' @export
#'
#' @seealso [set_msg_sink()], [get_msg_sink()].
#'
#' @examples
#' captured <- list()
#' with_msg_sink(
#'   function(m) captured[[length(captured) + 1L]] <<- m,
#'   {
#'     # any msg() / msg0() / msgstart() / msgdone() calls in here are captured
#'   }
#' )
with_msg_sink <- function(sink, code) {
  old <- set_msg_sink(sink)
  on.exit(set_msg_sink(old), add = TRUE)
  force(code)
} # /rtemis::with_msg_sink


================================================
FILE: R/preprocess.R
================================================
# preprocess.R
# ::rtemis::
# 2017- EDG rtemis.org

# %% preprocess(x, PreprocessorConfig, ...) ----
#' @name
#' preprocess
#'
#' @param x data.frame, data.table, tbl_df (tabular data): Data to be preprocessed.
#' @param config `PreprocessorConfig`: Setup using [setup_Preprocessor] OR `Preprocessor` object:
#' Output of previous run of `preprocess`. This allows, for example, applying preprocessing to a
#' validation or test set using the same parameters as were used for the training set. In
#' particular, the same scale centers and coefficients will be applied to the new data.
#' @param dat_validation tabular data: Validation set data.
#' @param dat_test tabular data: Test set data.
#' @param verbosity Integer: Verbosity level.
#' @param ... Not used.
#'
#' @author EDG
#' @export
preprocess.class_tabular.PreprocessorConfig <- method(
  preprocess,
  list(class_tabular, PreprocessorConfig)
) <- function(
  x,
  config,
  dat_validation = NULL,
  dat_test = NULL,
  verbosity = 1L
) {
  # -> Preprocessor
  # Intro ----
  start_time <- intro(verbosity = verbosity - 1L)
  # Init values list for Preprocessor output.
  values <- list(
    scale_centers = NULL,
    scale_coefficients = NULL,
    one_hot_levels = NULL,
    remove_features = NULL
  )

  # Data
  isdatatable <- data.table::is.data.table(x)
  x <- as.data.frame(x)

  # Complete cases ----
  if (config@complete_cases) {
    if (verbosity > 0L) {
      msg("Filtering complete cases...")
    }
    x <- x[complete.cases(x), ]
  }

  # Set aside excluded ----
  if (!is.null(config@exclude) && length(config@exclude) > 0) {
    excluded <- x[, config@exclude, drop = FALSE]
    excluded_names <- colnames(x)[config@exclude]
    x <- x[, -config@exclude, drop = FALSE]
  }

  # Remove named features ----
  if (!is.null(config@remove_features)) {
    if (verbosity > 0L) {
      msg("Removing", length(config@remove_features), "features...")
    }
    values$remove_features <- config@remove_features
    x <- x[, !names(x) %in% config@remove_features, drop = FALSE]
  }

  # Remove constants ----
  # Must be ahead of numeric quantile at least
  if (config@remove_constants) {
    constant <- which(sapply(
      x,
      is_constant,
      skip_missing = config@remove_constants_skip_missing
    ))
    if (length(constant) > 0) {
      if (verbosity > 0L) {
        msg0(
          "Removing ",
          singorplu(length(constant), "constant feature"),
          "..."
        )
      }
      x <- x[, -constant]
    }
  }

  # Remove duplicates ----
  if (config@remove_duplicates) {
    # Ndups <- sum(duplicated(x))
    duplicate_index <- which(duplicated(x))
    Ndups <- length(duplicate_index)
    if (Ndups > 0) {
      if (verbosity > 0L) {
        msg0("Removing ", singorplu(Ndups, "duplicate case"), "...")
      }
      x <- unique(x)
    }
  } else {
    duplicate_index <- NULL
  }

  # Remove Cases by missing feature threshold ----
  if (!is.null(config@remove_cases_thres)) {
    if (anyNA(x)) {
      xt <- data.table::as.data.table(x)
      # na_fraction_bycase <- apply(x, 1, function(i) sum(is.na(i))/length(i))
      na_fraction_bycase <- data.table::transpose(xt)[, lapply(
        .SD,
        function(i) {
          sum(is.na(i)) / length(i)
        }
      )]
      index_remove_cases_thres <- which(
        na_fraction_bycase >= config@remove_cases_thres
      )
      if (length(index_remove_cases_thres) > 0) {
        if (verbosity > 0L) {
          msg(
            "Removing",
            length(index_remove_cases_thres),
            "cases with >=",
            config@remove_cases_thres,
            "missing data..."
          )
        }
        xt <- xt[-index_remove_cases_thres, ]
      }
      x <- as.data.frame(xt)
    }
  }

  # Remove Features by missing feature threshold ----
  if (!is.null(config@remove_features_thres)) {
    if (anyNA(x)) {
      xt <- data.table::as.data.table(x)
      na.fraction.byfeat <- xt[, lapply(.SD, function(i) {
        sum(is.na(i)) / length(i)
      })]
      removeFeat_thres_index <- which(
        na.fraction.byfeat >= config@remove_features_thres
      )
      if (length(removeFeat_thres_index) > 0) {
        if (verbosity > 0L) {
          msg(
            "Removing",
            length(removeFeat_thres_index),
            "features with >=",
            config@remove_features_thres,
            "missing data..."
          )
        }
        x <- x[, -removeFeat_thres_index]
      }
    }
  }

  # Integer to factor ----
  index_integer <- NULL
  if (config@integer2factor) {
    index_integer <- c(
      which(sapply(x, is.integer)),
      which(sapply(x, bit64::is.integer64))
    )
    if (verbosity > 0L) {
      if (length(index_integer) > 0) {
        msg(
          "Converting",
          singorplu(length(index_integer), "integer"),
          "to factor..."
        )
      } else {
        msg("No integers to convert to factor...")
      }
    }
    for (i in index_integer) {
      x[, i] <- as.factor(x[, i])
    }
  }

  # Logical to factor ----
  if (config@logical2factor) {
    index_logical <- which(sapply(x, is.logical))
    if (verbosity > 0L) {
      if (length(index_logical) > 0) {
        msg0(
          "Converting ",
          singorplu(length(index_logical), "logical feature"),
          " to ",
          ngettext(length(index_logical), "factor", "factors"),
          "..."
        )
      } else {
        msg("No logicals to convert to factor...")
      }
    }
    for (i in index_logical) {
      x[, i] <- as.factor(x[, i])
    }
  }

  # Numeric to factor ----
  if (config@numeric2factor) {
    index_numeric <- which(sapply(x, is.numeric))
    if (verbosity > 0L) {
      msg("Converting numeric to factors...")
    }
    if (is.null(config@numeric2factor_levels)) {
      for (i in index_numeric) {
        x[, i] <- as.factor(x[, i])
      }
    } else {
      for (i in index_numeric) {
        x[, i] <- factor(x[, i], levels = config@numeric2factor_levels)
      }
    }
  }

  # Character to factor ----
  if (config@character2factor) {
    index_char <- which(sapply(x, is.character))
    if (verbosity > 0L) {
      if (length(index_char) > 0) {
        msg0(
          "Converting ",
          singorplu(length(index_char), "character feature"),
          " to ",
          ngettext(length(index_char), "a factor", "factors"),
          "..."
        )
      } else {
        msg("No character features to convert to factors found.")
      }
    }
    for (i in index_char) {
      x[, i] <- as.factor(x[, i])
    }
  }

  # unique_len2factor ----
  if (config@unique_len2factor > 1) {
    index_len <- which(sapply(
      x,
      \(i) length(unique(i)) <= config@unique_len2factor
    ))
    # Exclude factors
    index_factor <- which(sapply(x, is.factor))
    index_len <- setdiff(index_len, index_factor)
    if (verbosity > 0L) {
      if (length(index_len) > 0) {
        msg(
          "Converting",
          singorplu(length(index_len), "feature"),
          "with <=",
          config@unique_len2factor,
          "unique values to factors..."
        )
      } else {
        msg(
          "No features with <=",
          config@unique_len2factor,
          "unique values found."
        )
      }
    }
    for (i in index_len) {
      x[, i] <- factor(x[, i])
    }
  }

  # Integer to numeric ----
  if (config@integer2numeric) {
    if (is.null(index_integer)) {
      index_integer <- c(
        which(sapply(x, is.integer)),
        which(sapply(x, bit64::is.integer64))
      )
    }
    if (verbosity > 0L) {
      if (length(index_integer) > 0) {
        msg(
          "Converting",
          singorplu(length(index_integer), "integer"),
          "to numeric..."
        )
      } else {
        msg("No integers to convert to numeric...")
      }
    }
    for (i in index_integer) {
      x[, i] <- as.numeric(x[, i])
    }
  }

  # Logical to numeric ----
  if (config@logical2numeric) {
    index_logical <- which(sapply(x, is.logical))
    if (verbosity > 0L) {
      msg("Converting logicals to numeric...")
    }
    for (i in index_logical) {
      x[, i] <- as.numeric(x[, i])
    }
  }

  # Numeric cut ----
  if (config@numeric_cut_n > 0) {
    index_numeric <- which(sapply(x, is.numeric))
    if (length(index_numeric) > 0) {
      if (verbosity > 0L) {
        msg("Cutting numeric features in", config@numeric_cut_n, "bins...")
      }
      for (i in index_numeric) {
        x[, i] <- factor(
          cut(
            x[, i],
            breaks = config@numeric_cut_n,
            labels = config@numeric_cut_labels
          )
        )
      }
    }
  }

  # Numeric quantile ----
  if (config@numeric_quant_n > 0) {
    index_numeric2q <- if (config@numeric_quant_nAonly) {
      index_numeric2q <- which(sapply(x, is.numeric) & sapply(x, anyNA))
    } else {
      which(sapply(x, is.numeric))
    }
    if (length(index_numeric2q) > 0) {
      if (verbosity > 0L) {
        msg(
          "Cutting numeric features in",
          config@numeric_quant_n,
          "quantiles..."
        )
      }
      for (i in index_numeric2q) {
        rng <- abs(diff(range(x[, i], na.rm = TRUE)))
        quantiles <- quantile(
          x[, i],
          probs = seq(0, 1, length.out = config@numeric_quant_n),
          na.rm = TRUE
        )
        quantiles[1] <- quantiles[1] - .02 * rng
        quantiles[config@numeric_quant_n] <- quantiles[
          config@numeric_quant_n
        ] +
          .02 * rng
        quantiles <- unique(quantiles)
        x[, i] <- factor(
          cut(
            x[, i],
            breaks = quantiles
          )
        )
      }
    }
  }

  # factor NA to level ----
  if (config@factorNA2missing) {
    index_factor <- which(sapply(x, is.factor))
    if (verbosity > 0L) {
      if (length(index_factor) > 0) {
        msg0(
          "Converting ",
          length(index_factor),
          ngettext(length(index_factor), " factor's", " factors'"),
          " NA values to level '",
          config@factorNA2missing_level,
          "'..."
        )
      } else {
        msg("No factors found.")
      }
    }
    for (i in index_factor) {
      x[, i] <- factor_NA2missing(x[, i], config@factorNA2missing_level)
    }
  }

  # Factor to integer ----
  # e.g. for algorithms that do not support factors directly, but can handle integers
  # as categorical (e.g. LightGBM)
  if (config@factor2integer) {
    index_factor <- which(sapply(x, is.factor))
    if (verbosity > 0L) {
      if (length(index_factor) > 0) {
        msg(
          "Converting",
          singorplu(length(index_factor), "factor"),
          "to integer..."
        )
      } else {
        msg("No factors found to convert to integer...")
      }
    }
    if (config@factor2integer_startat0) {
      for (i in index_factor) {
        x[, i] <- as.integer(x[, i]) - 1
      }
    } else {
      for (i in index_factor) {
        x[, i] <- as.integer(x[, i])
      }
    }
  }

  # Missingness ----
  if (config@missingness) {
    cols_with_na <- which(apply(x, 2, anyNA))
    .colnames <- colnames(x)
    for (i in cols_with_na) {
      x[, paste0(.colnames[i], "_missing")] <- factor(as.numeric(is.na(x[, i])))
      if (verbosity > 0L) {
        msg0("Created missingness indicator for ", .colnames[i], "...")
      }
    }
  }

  # Impute ----
  if (config@impute) {
    if (config@impute_type == "missRanger") {
      # '- missRanger ----
      check_dependencies("missRanger")
      if (verbosity > 0L) {
        if (config@impute_missRanger_params[["pmm.k"]] > 0) {
          msg(
            "Imputing missing values using predictive mean matching with missRanger..."
          )
        } else {
          msg("Imputing missing values using missRanger...")
        }
      }
      x <- missRanger::missRanger(
        x,
        pmm.k = config@impute_missRanger_params[["pmm.k"]],
        verbose = verbosity
      )
    } else if (config@impute_type == "micePMM") {
      check_dependencies("mice")
      if (verbosity > 0L) {
        msg(
          "Imputing missing values by predictive mean matching using mice..."
        )
      }
      x <- mice::complete(mice::mice(x, m = 1, method = "pmm"))
    } else {
      # '- mean/mode ----
      if (verbosity > 0L) {
        msg(
          "Imputing missing values using",
          config@impute_discrete,
          "(discrete) and",
          config@impute_continuous,
          "(continuous)..."
        )
      }

      index_discrete <- which(sapply(x, function(i) is_discrete(i) && anyNA(i)))
      if (length(index_discrete) > 0) {
        for (i in index_discrete) {
          index <- which(is.na(x[, i]))
          imputed <- do_call(
            config@impute_discrete,
            list(x[[i]], na.rm = TRUE)
          )
          x[index, i] <- imputed
        }
      }

      index_numeric <- which(sapply(x, function(i) is.numeric(i) && anyNA(i)))
      if (length(index_numeric) > 0) {
        for (i in index_numeric) {
          index <- which(is.na(x[, i]))
          imputed <- do_call(
            config@impute_continuous,
            list(x[[i]], na.rm = TRUE)
          )
          x[index, i] <- imputed
        }
      }
    }
  }

  # Scale +/- center ----
  if (config@scale || config@center) {
    # Get index of numeric features
    numeric_index <- which(sapply(x, is.numeric))
    sc <- if (config@scale) "Scaling" else NULL
    ce <- if (config@center) "centering" else NULL
    if (length(numeric_index) > 0) {
      if (verbosity > 0L) {
        msg(
          paste(c(sc, ce), collapse = " and "),
          length(numeric_index),
          "numeric features..."
        )
      }
      # Info: scale outputs a matrix.
      scale_ <- if (!is.null(config@scale_coefficients)) {
        # Check names match
        stopifnot(identical(
          names(config@scale_coefficients),
          names(x[, numeric_index])
        ))
        config@scale_coefficients
      } else {
        config@scale
      }
      center_ <- if (!is.null(config@scale_centers)) {
        # Check names match
        stopifnot(identical(
          names(config@scale_centers),
          names(x[, numeric_index])
        ))
        config@scale_centers
      } else {
        config@center
      }
      x_num_scaled <- scale(
        x[, numeric_index, drop = FALSE],
        scale = scale_,
        center = center_
      )

      # Collect scale and center values
      values$scale_centers <- attr(x_num_scaled, "scaled:center")
      values$scale_coefficients <- attr(x_num_scaled, "scaled:scale")

      x_num_scaled <- as.data.frame(x_num_scaled)

      # Insert into original dataset
      x[, numeric_index] <- x_num_scaled
      # j <- 0
      # for (i in numeric_index) {
      #   j <- j + 1
      #   x[, i] <- x_num_scaled[, j]
      # }
    } else {
      msg(
        paste(c(sc, ce), collapse = " and "),
        "was requested \n                                but no numeric features were found: Please check data."
      )
    }
  }

  # One Hot Encoding ----
  if (config@one_hot) {
    x <- one_hot(
      x,
      verbosity = verbosity,
      factor_levels = config@one_hot_levels
    )
  }

  # Add date features ----
  if (config@add_date_features) {
    if (verbosity > 0L) {
      msg("Extracting date features...")
    }
    # Find date columns
    date_cols <- which(sapply(x, function(col) inherits(col, "Date")))
    # For each date column, extract features
    for (i in date_cols) {
      .date_features <- dates2features(
        x[[i]],
        features = config@date_features
      )
      names(.date_features) <- paste0(names(x)[i], "_", names(.date_features))
      x <- cbind(x, .date_features)
    }
  }

  # Add holidays ----
  if (config@add_holidays) {
    if (verbosity > 0L) {
      msg("Extracting holidays...")
    }
    # Find date columns
    date_cols <- which(sapply(x, \(col) inherits(col, "Date")))
    # For each date column, extract holidays
    for (i in date_cols) {
      .holidays <- get_holidays(x[, i])
      x[[paste0(names(x)[i], "_holidays")]] <- .holidays
    }
  }

  # Add back excluded ----
  if (!is.null(config@exclude) && length(config@exclude) > 0) {
    # remove any duplicates
    if (!is.null(duplicate_index)) {
      excluded <- excluded[-duplicate_index, , drop = FALSE]
    }

    # remove by case thres
    if (
      !is.null(config@remove_cases_thres) &&
        length(index_remove_cases_thres) > 0
    ) {
      n_feat_inc <- NCOL(x)
      x <- cbind(x, excluded[-index_remove_cases_thres, ])
      colnames(x)[-c(seq(n_feat_inc))] <- excluded_names
    } else {
      x <- cbind(x, excluded)
    }
  } # /add back excluded

  if (isdatatable) {
    data.table::setDT(x)
  }
  if (verbosity > 0L) {
    msg("Preprocessing done.")
  }

  preprocessed <- list(training = x)

  if (!is.null(dat_validation)) {
    if (verbosity > 0L) {
      msg("Applying preprocessing to validation data...")
    }
    prp_validation <- preprocess(
      x = dat_validation,
      config = Preprocessor(
        config = config,
        preprocessed = list(),
        scale_centers = values[["scale_centers"]],
        scale_coefficients = values[["scale_coefficients"]],
        one_hot_levels = values[["one_hot_levels"]],
        remove_features = values[["remove_features"]]
      ),
      verbosity = verbosity
    )
    preprocessed$validation <- prp_validation@preprocessed
  }
  if (!is.null(dat_test)) {
    if (verbosity > 0L) {
      msg("Applying preprocessing to test data...")
    }
    prp_test <- preprocess(
      x = dat_test,
      config = Preprocessor(
        config = config,
        preprocessed = list(),
        scale_centers = values[["scale_centers"]],
        scale_coefficients = values[["scale_coefficients"]],
        one_hot_levels = values[["one_hot_levels"]],
        remove_features = values[["remove_features"]]
      ),
      verbosity = verbosity
    )
    preprocessed$test <- prp_test@preprocessed
  }
  outro(start_time, verbosity = verbosity - 1L)
  Preprocessor(
    config = config,
    preprocessed = if (length(preprocessed) == 1) {
      preprocessed[[1]]
    } else {
      preprocessed
    },
    scale_centers = values[["scale_centers"]],
    scale_coefficients = values[["scale_coefficients"]],
    one_hot_levels = values[["one_hot_levels"]],
    remove_features = values[["remove_features"]]
  )
} # /rtemis::preprocess(PreprocessorConfig, ...)


# %% preprocess(x, Preprocessor, ...) ----
#' @name
#' preprocess
#'
#' @author EDG
#' @export
preprocess.class_tabular.Preprocessor <- method(
  preprocess,
  list(class_tabular, Preprocessor)
) <- function(
  x,
  config,
  verbosity = 1L
) {
  # -> Preprocessor
  params <- config@config
  # Overwrite scale_centers, scale_coefficients, one_hot_levels, and remove_features
  params@scale_centers <- config@values[["scale_centers"]]
  params@scale_coefficients <- config@values[["scale_coefficients"]]
  params@one_hot_levels <- config@values[["one_hot_levels"]]
  params@remove_features <- config@values[["remove_features"]]

  preprocess(x, params, verbosity = verbosity)
} # /rtemis::preprocess(Preprocessor, ...)


# %% one_hot ----
#' @name one_hot
#'
#' @title
#' One hot encoding
#'
#' @description
#' One hot encode a vector or factors in a data.frame
#'
#' @details
#' A vector input will be one-hot encoded regardless of type by looking at all unique values. With data.frame input,
#' only column of type factor will be one-hot encoded.
#' This function is used by [preprocess].
#' `one_hot.data.table` operates on a copy of its input.
#' `one_hot_` performs one-hot encoding ***in-place***.
#'
#' @param x Vector or data.frame
#' @param xname Character: Variable name
#' @param verbosity Integer: Verbosity level.
#'
#' @return For vector input, a one-hot-encoded matrix, for data.frame frame
#' input, an expanded data.frame where all factors are one-hot encoded
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' # factor with only one unique value but 2 levels:
#' vf <- factor(rep("alpha", 20), levels = c("alpha", "beta"))
#' vf_one_hot <- one_hot(vf)
#' vf_one_hot
method(one_hot, class_any) <- function(x, xname = NULL, verbosity = 1L) {
  if (is.null(xname)) {
    xname <- deparse(substitute(x))
  }
  # ensures if factor without all levels present, gets all columns created
  if (!is.factor(x)) {
    x <- factor(x)
  }
  .levels <- levels(x)
  ncases <- NROW(x)
  index <- as.integer(x)
  oh <- matrix(0, ncases, length(.levels))
  colnames(oh) <- paste(xname, .levels, sep = "_")
  for (i in seq(ncases)) {
    oh[i, index[i]] <- 1
  }
  oh
} # /rtemis::one_hot.default


# included for benchmarking mostly
one_hotcm <- function(
  x,
  xname = deparse(substitute(x)),
  return = "data.frame"
) {
  stopifnot(is.factor(x))
  dt <- data.table(
    ID = seq_along(x),
    x = x
  )
  setnames(dt, "x", xname)
  out <- dcast(
    melt(dt, id.vars = "ID"),
    ID ~ variable + value,
    fun.aggregate = length
  )[, -1]
  if (return == "data.frame") {
    setDF(out)
  }
  out
}

# loop is faster than dcast/melt
# x <- iris$Species
# microbenchmark::microbenchmark(loop = one_hot.default(x), dt = one_hotcm(x))

# %% one_hot.data.frame ----
#' @rdname one_hot
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' one_hot(iris) |> head()
method(one_hot, class_data.frame) <- function(
  x,
  factor_levels = NULL,
  verbosity = 1L
) {
  ncases <- NROW(x)
  factor_index <- which(sapply(x, is.factor))
  # If factor_levels list is provided, check column names match
  if (!is.null(factor_levels)) {
    stopifnot(identical(names(factor_levels), colnames(x[, factor_index])))
  }
  one.hot <- as.list(x)
  if (verbosity > 0L) {
    .names <- colnames(x)
  }
  for (i in factor_index) {
    if (verbosity > 0L) {
      msgstart("One hot encoding ", .names[i], "...")
    }
    .levels <- if (!is.null(factor_levels)) {
      factor_levels[[i]]
    } else {
      levels(x[[i]])
    }
    index <- as.integer(x[, i])
    oh <- matrix(0, ncases, length(.levels))
    colnames(oh) <- paste0(names(x)[i], "_", .levels)
    for (j in seq(ncases)) {
      oh[j, index[j]] <- 1
    }
    # Replace list element that was a factor with one-hot encoded matrix
    one.hot[[i]] <- oh
  }
  if (verbosity > 0L) {
    msgdone()
  }
  # do.call below creates a matrix, maintaining column names in one.hot matrix.
  # as.data.frame on one.hot would have added {name_of_oh_element}.{column_names}
  as.data.frame(do.call(cbind, one.hot))
} # /rtemis::one_hot.data.frame


# %% one_hot.data.table ----
#' @rdname one_hot
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' ir <- data.table::as.data.table(iris)
#' ir_oh <- one_hot(ir)
#' ir_oh
method(one_hot, class_data.table) <- function(x, verbosity = 1L) {
  x <- copy(x)
  ncases <- NROW(x)
  factor_index <- which(sapply(x, is.factor))
  .names <- colnames(x)
  for (i in factor_index) {
    if (verbosity > 0L) {
      msg_info("One hot encoding ", .names[i], "...")
    }
    .levels <- levels(x[[i]])
    index <- as.integer(x[[i]])
    oh <- as.data.table(matrix(0, ncases, length(.levels)))
    .colnames <- colnames(oh) <- .levels
    for (k in seq_along(.levels)) {
      oh[index == k, (.colnames[k]) := 1]
    }
    x[, (paste(.names[i], .levels, sep = "_")) := oh]
  }
  # remove original factor(s)
  x[, paste(.names[factor_index]) := NULL]
  if (verbosity > 0L) {
    msg("Done")
  }
  invisible(x)
} # /rtemis::one_hot.data.table


#' Convert data.table's factor to one-hot encoding ***in-place***
#'
#' @param x data.table: Input data.table. Will be modified ***in-place***.
#' @param xname Character, optional: Dataset name.
#' @param verbosity Integer: Verbosity level.
#'
#' @return The input, invisibly, after it has been modified ***in-place***.
#'
#' @author EDG
#' @export
#'
#' @examples
#' ir <- data.table::as.data.table(iris)
#' # dt_set_one_hot operates ***in-place***; therefore no assignment is used:
#' dt_set_one_hot(ir)
#' ir
dt_set_one_hot <- function(x, xname = NULL, verbosity = 1L) {
  if (is.null(xname)) {
    xname <- deparse(substitute(x))
  }
  ncases <- NROW(x)
  factor_index <- which(sapply(x, is.factor))
  .names <- colnames(x)
  for (i in factor_index) {
    if (verbosity > 0L) {
      msg_info("One hot encoding ", .names[i], "...")
    }
    .levels <- levels(x[[i]])
    index <- as.numeric(x[[i]])
    oh <- as.data.table(matrix(0, ncases, length(.levels)))
    .colnames <- colnames(oh) <- paste(xname, .levels, sep = "_")
    for (k in seq_along(.levels)) {
      oh[index == k, (.colnames[k]) := 1]
    }
    x[, (paste(.names[i], .levels, sep = "_")) := oh]
  }
  # remove original factor(s)
  x[, paste(.names[factor_index]) := NULL]
  if (verbosity > 0L) {
    msg("Done")
  }
  invisible(x)
} # /rtemis::dt_set_one_hot


#' Convert one-hot encoded matrix to factor
#'
#' @details If input has a single column, it will be converted to factor and
#' returned
#'
#' @param x one-hot encoded matrix or data.frame.
#' @param labels Character vector of level names.
#'
#' @return A factor.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- data.frame(matrix(FALSE, 10, 3))
#' colnames(x) <- c("Dx1", "Dx2", "Dx3")
#' x$Dx1[1:3] <- x$Dx2[4:6] <- x$Dx3[7:10] <- TRUE
#' one_hot2factor(x)
one_hot2factor <- function(x, labels = colnames(x)) {
  if (NCOL(x) == 1) {
    return(factor(x))
  }
  if (any(na.exclude(rowSums(x)) > 1)) {
    cli::cli_abort("Input must be one-hot encoded.")
  }
  out <- factor(rep(NA, NROW(x)), levels = labels)
  for (i in seq_along(labels)) {
    out[x[, i] == 1] <- labels[i]
  }
  out
} # /rtemis::one_hot2factor


#' Binary matrix times character vector
#'
#' @param x A binary matrix or data.frame
#' @param labels Character vector length equal to `ncol(x)`
#'
#' @return a character vector
#'
#' @author EDG
#' @export
`%BC%` <- function(x, labels) {
  if (NCOL(x) == 1) {
    return(factor(x))
  }
  dt <- as.data.table(x)
  fn <- \(r) paste(unique(labels[which(r == 1)]), collapse = ",")
  out <- dt[, list(fn(.SD)), by = seq_len(NROW(dt))][[2]]
  out[out == ""] <- NA
  out
} # /rtemis::`%BC%`


#' Binary matrix to list vector
#'
#' @author EDG
#' @keywords internal
#' @noRd
binmat2lvec <- function(x, labels = colnames(x), return.list = FALSE) {
  if (NCOL(x) == 1) {
    return(factor(x))
  }
  dt <- as.data.table(x)
  if (return.list) {
    fn <- \(r) list(labels[which(r == 1)])
    out <- dt[, list(fn(.SD)), by = seq_len(NROW(dt))][[2]]
    out[sapply(out, length) == 0] <- NA
  } else {
    fn <- \(r) paste(unique(labels[which(r == 1)]), collapse = ",")
    out <- dt[, list(fn(.SD)), by = seq_len(NROW(dt))]
    out[out == ""] <- NA
  }
  out
} # /rtemis::binmat2lvec


# %% feature_matrix ----
#' Convert tabular data to feature matrix
#'
#' Convert a tabular dataset to a matrix, one-hot encoding factors, if present.
#'
#' @details
#' This is a convenience function that uses  [features()], [preprocess()], `as.matrix()`.
#'
#' @param x tabular data: Input data to convert to a feature matrix.
#'
#' @return Matrix with features. Factors are one-hot encoded, if present.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # reorder columns so that we have a categorical feature
#' x <- set_outcome(iris, "Sepal.Length")
#' feature_matrix(x) |> head()
feature_matrix <- function(x) {
  x |>
    features() |>
    preprocess(setup_Preprocessor(one_hot = TRUE)) |>
    preprocessed() |>
    as.matrix()
} # /rtemis::feature_matrix


================================================
FILE: R/present.R
================================================
# present.R
# ::rtemis::
# 2025 EDG rtemis.org

#' Present list of Supervised or SupervisedRes objects
#'
#' Plot training and testing performance boxplots of multiple `Supervised` or `SupervisedRes` objects
#'
#' @param x List of `Supervised` or `SupervisedRes` objects.
#' @param metric Character: Metric to plot.
#' @param model_names Character: Names of models being plotted.
#' @param ylim Numeric vector of length 2: y-axis limits for the boxplots.
#' @param theme `Theme` object.
#' @param boxpoints Character: "all", "outliers", or "suspectedoutliers". Determines how points are
#' displayed in the boxplot.
#' @param filename Character: Filename to save the plot to.
#' @param file_width Numeric: Width of the exported image in pixels.
#' @param file_height Numeric: Height of the exported image in pixels.
#' @param file_scale Numeric: Scale factor for the exported image.
#' @param verbosity Integer: Verbosity level.
#'
#' @return plotly object
#'
#' @author EDG
#' @noRd
#'
#' @examples
#' \dontrun{
#' iris_lightrf <- train(
#'   iris,
#'   algorithm = "lightrf",
#'   outer_resampling_config = setup_Resampler(seed = 2026)
#' )
#' iris_rsvm <- train(
#'   iris,
#'   algorithm = "radialsvm",
#'   outer_resampling_config = setup_Resampler(seed = 2026)
#' )
#' present(list(iris_lightrf, iris_rsvm), metric = "Balanced_Accuracy")
#' }
method(present, class_list) <- function(
  x,
  metric = NULL,
  model_names = NULL,
  ylim = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  boxpoints = "all",
  filename = NULL,
  file_width = 800,
  file_height = 600,
  file_scale = 1,
  verbosity = 1L
) {
  # Check that all elements of x are either Supervised or SupervisedRes objects
  all_supervised <- all(sapply(x, function(m) {
    S7_inherits(m, Supervised)
  }))
  all_supervisedres <- all(sapply(x, function(m) {
    S7_inherits(m, SupervisedRes)
  }))

  if (!(all_supervised || all_supervisedres)) {
    cli::cli_abort(
      "Input must be a list of Supervised or SupervisedRes objects."
    )
  }

  # Check all models are of the same type
  type <- unique(sapply(x, function(m) m@type))
  if (length(type) > 1) {
    cli::cli_abort("All models must be of the same type")
  }

  # Describe
  if (verbosity > 0L) {
    describe(x)
  }

  # Get names
  if (is.null(model_names)) {
    model_names <- sapply(x, function(m) {
      m@algorithm
    })
  }

  # If any names are duplicated, append a number
  if (any(duplicated(model_names))) {
    model_names <- make.unique(model_names, sep = "_")
  }

  # Metric
  if (is.null(metric)) {
    metric <- switch(
      type,
      Classification = "Balanced_Accuracy",
      Regression = "Rsq"
    )
  }

  # Data
  xl_training <- lapply(x, function(m) {
    get_metric(m, set = "training", metric = metric)
  })
  xl_test <- lapply(x, function(m) {
    get_metric(m, set = "test", metric = metric)
  })
  names(xl_training) <- names(xl_test) <- model_names

  # Plots
  if (all_supervisedres) {
    # Get ylim
    if (is.null(ylim)) {
      ylim <- range(c(xl_training, xl_test), na.rm = TRUE)
    }
    plot_training <- draw_box(
      xl_training,
      ylab = labelify(paste("Training", metric)),
      ylim = ylim,
      theme = theme,
      boxpoints = boxpoints
    )
    plot_test <- draw_box(
      xl_test,
      ylab = labelify(paste("Test", metric)),
      ylim = ylim,
      theme = theme,
      boxpoints = boxpoints
    )
    plt <- plotly::subplot(
      plot_training,
      plot_test,
      nrows = 2L,
      shareX = TRUE,
      shareY = FALSE,
      titleX = TRUE,
      titleY = TRUE,
      margin = 0.05
    )
  } else {
    # rows are groups, columns are features
    xdf_training <- as.data.frame(xl_training)
    xdf_test <- as.data.frame(xl_test)
    xdf <- t(rbind(xdf_training, xdf_test))
    colnames(xdf) <- c("Training", "Test")
    plt <- draw_bar(xdf, ylab = labelify(metric), theme = theme)
  }

  if (!is.null(filename)) {
    export_plotly(
      plt,
      filename = filename,
      width = file_width,
      height = file_height,
      scale = file_scale
    )
  }
  plt
} # /rtemis::present.list


================================================
FILE: R/read.R
================================================
# read.R
# ::rtemis::
# 2022- EDG rtemis.org

# %% read ----
#' Read tabular data from a variety of formats
#'
#' Read data and optionally clean column names, keep unique rows, and convert
#' characters to factors
#'
#' @details
#' `read` is a convenience function to read:
#'
#' - **Delimited** files using `data.table:fread()`, `arrow:read_delim_arrow()`,
#'   `vroom::vroom()`, or `duckdb::duckdb_read_csv()`
#' - **ARFF** files using `farff::readARFF()`
#' - **Parquet** files using `arrow::read_parquet()`
#' - **XLSX** files using `readxl::read_excel()`
#' - **DTA** files from Stata using `haven::read_dta()`
#' - **FASTA** files using `seqinr::read.fasta()`
#' - **RDS** files using `readRDS()`
#'
#' @param filename Character: filename or full path if `datadir = NULL`.
#' @param datadir Character: Optional path to directory where `filename`
#' is located. If not specified, `filename` must be the full path.
#' @param make_unique Logical: If TRUE, keep unique rows only.
#' @param character2factor Logical: If TRUE, convert character variables to
#' factors.
#' @param clean_colnames Logical: If TRUE, clean columns names using
#' [clean_colnames].
#' @param delim_reader Character: package to use for reading delimited data.
#' @param xlsx_sheet Integer or character: Name or number of XLSX sheet to read.
#' @param sep Single character: field separator. If `delim_reader = "fread"`
#' and `sep = NULL`, this defaults to "auto", otherwise defaults to ",".
#' @param quote Single character: quote character.
#' @param na_strings Character vector: Strings to be interpreted as NA values.
#' For `delim_reader = "duckdb"`, this must be a single string.
#' @param output Character: "default" or "data.table", If default, return the delim_reader's
#' default data structure, otherwise convert to data.table.
#' @param attr Character: Attribute to set (Optional).
#' @param value Character: Value to set (if `attr` is not NULL).
#' @param verbosity Integer: Verbosity level.
#' @param fread_verbosity Integer: Verbosity level. Passed to `data.table::fread`
#' @param timed Logical: If TRUE, time the process and print to console
#' @param ... Additional arguments to pass to `data.table::fread`,
#' `arrow::read_delim_arrow()`, `vroom::vroom()`,
#' or `readxl::read_excel()`.
#'
#' @return data.frame, data.table, or tibble.
#'
#' @author EDG
#' @export
#'
#' @examples
#' \dontrun{
#' # Replace with your own data directory and filename
#' datadir <- "/Data"
#' dat <- read("iris.csv", datadir)
#' }
read <- function(
  filename,
  datadir = NULL,
  make_unique = FALSE,
  character2factor = FALSE,
  clean_colnames = TRUE,
  delim_reader = c("data.table", "vroom", "duckdb", "arrow"),
  xlsx_sheet = 1,
  sep = NULL,
  quote = "\"",
  na_strings = c(""),
  output = c("data.table", "tibble", "data.frame"),
  attr = NULL,
  value = NULL,
  verbosity = 1L,
  fread_verbosity = 0L,
  timed = verbosity > 0L,
  ...
) {
  check_dependencies("data.table")
  if (timed) {
    start_time <- intro(verbosity = 0L)
  }
  delim_reader <- match.arg(delim_reader)
  output <- match.arg(output)
  if (output == "tibble") {
    check_dependencies("tibble")
  }
  ext <- tools::file_ext(filename)
  path <- if (is.null(datadir)) {
    filename
  } else {
    file.path(datadir, filename)
  }
  path <- path.expand(path)
  # Sanitize path for security
  path <- sanitize_path(path, must_exist = FALSE)

  if (ext == "parquet") {
    check_dependencies("arrow")
    msg0(
      bold(highlight("\u25B6")),
      " Reading ",
      highlight(basename(path)),
      " using arrow::read_parquet()...",
      verbosity = verbosity
    )
    .dat <- arrow::read_parquet(path, ...)
  } else if (ext == "rds") {
    msg0(
      bold(highlight("\u25B6")),
      " Reading ",
      highlight(basename(path)),
      "...",
      verbosity = verbosity
    )
    .dat <- readRDS(path)
  } else if (ext == "xlsx") {
    check_dependencies("readxl")
    msg0(
      bold(highlight("\u25B6")),
      " Reading ",
      highlight(basename(path)),
      " using readxl::read_excel()...",
      verbosity = verbosity
    )
    .dat <- readxl::read_excel(
      path,
      sheet = xlsx_sheet,
      na = na_strings,
      ...
    )
  } else if (ext == "dta") {
    check_dependencies("haven")
    msg0(
      bold(highlight("\u25B6")),
      " Reading ",
      highlight(basename(path)),
      " using haven::read_dta()...",
      verbosity = verbosity
    )
    .dat <- haven::read_dta(path, ...)
  } else if (ext == "fasta") {
    check_dependencies("seqinr")
    msg0(
      bold(highlight("\u25B6")),
      " Reading ",
      highlight(basename(path)),
      " using seqinr::read.fasta()...",
      verbosity = verbosity
    )
    .dat <- seqinr::read.fasta(path, ...)
    # if single sequence, return as character
    if (length(.dat) == 1) {
      .dat <- as.character(.dat[[1]])
    }
    return(.dat)
  } else if (ext == "arff") {
    check_dependencies("farff")
    msg0(
      bold(highlight("\u25B6")),
      " Reading ",
      highlight(basename(path)),
      " using farff::readARFF()...",
      verbosity = verbosity
    )
    .dat <- farff::readARFF(path, ...)
  } else {
    msg0(
      bold(highlight("\u25B6")),
      " Reading ",
      highlight(basename(path)),
      " using ",
      delim_reader,
      "...",
      verbosity = verbosity
    )
    if (delim_reader == "data.table") {
      if (is.null(sep)) {
        sep <- "auto"
      }
      .dat <- data.table::fread(
        path,
        sep = sep,
        quote = quote,
        na.strings = na_strings,
        verbose = fread_verbosity > 0L,
        ...
      )
    } else if (delim_reader == "duckdb") {
      check_dependencies("DBI", "duckdb")
      if (is.null(sep)) {
        sep <- ","
      }
      if (length(na_strings) > 1) {
        msg(
          "Note: 'na_strings' must be a single string for duckdb; setting to '",
          na_strings[1],
          "'"
        )
        na_strings <- na_strings[1]
      }
      con <- DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:")
      on.exit(DBI::dbDisconnect(con, shutdown = TRUE), add = TRUE)
      duckdb::duckdb_read_csv(
        con,
        "data",
        path,
        header = TRUE,
        na.strings = na_strings,
        nrow.check = 500,
        delim = sep,
        quote = quote,
        ...
      )
      .dat <- DBI::dbReadTable(con, "data")
    } else if (delim_reader == "arrow") {
      check_dependencies("arrow")
      if (is.null(sep)) {
        sep <- ","
      }
      .dat <- arrow::read_delim_arrow(
        path,
        delim = sep,
        quote = quote,
        na = na_strings,
        ...
      )
    } else {
      check_dependencies("vroom")
      .dat <- vroom::vroom(
        path,
        delim = sep,
        quote = quote,
        na = na_strings,
        progress = verbosity > 0L,
        ...
      )
    }
  }

  .nrow <- nrow(.dat)
  .ncol <- ncol(.dat)
  msg(
    "Read in",
    highlightbig(.nrow),
    "x",
    highlightbig(.ncol),
    verbosity = verbosity
  )
  if (make_unique) {
    .dat <- unique(.dat)
    .nrowp <- nrow(.dat)
    .dup <- .nrow - .nrowp
    if (verbosity > 0L && .dup > 0) {
      msg(
        "Removed",
        bold(orange(format(.dup, big.mark = ","))),
        "duplicate",
        paste0(ngettext(.dup, "row", "rows"), ".")
      )
      msg(
        "New dimensions:",
        highlightbig(.nrowp),
        "x",
        highlightbig(.ncol)
      )
    }
  }

  if (clean_colnames) {
    setnames(.dat, names(.dat), clean_colnames(.dat))
  }

  if (character2factor) {
    .dat <- preprocess(
      .dat,
      setup_Preprocessor(character2factor = TRUE)
    )[["preprocessed"]]
  }

  if (!is.null(attr) && !is.null(value)) {
    for (i in seq_len(ncol(.dat))) {
      setattr(.dat[[i]], attr, value)
    }
  }

  if (timed) {
    outro(start_time)
  }

  # Set output structure
  if (output == "data.table") {
    if (!is.data.table(.dat)) setDT(.dat)
  } else if (output == "tibble") {
    .dat <- tibble::as_tibble(.dat)
  } else if (output == "data.frame") {
    if (!is.data.frame(.dat)) {
      .dat <- as.data.frame(.dat)
    } else {
      setDF(.dat)
    }
  }

  .dat
} # /rtemis::read


================================================
FILE: R/resample.R
================================================
# resample.R
# ::rtemis::
# 2015- EDG rtemis.org

#' Resample data
#'
#' Create resamples of your data, e.g. for model building or validation.
#' "KFold" creates stratified folds, , "StratSub" creates stratified subsamples,
#' "Bootstrap" gives the standard bootstrap, i.e. random sampling with replacement,
#' while "StratBoot" uses StratSub and then randomly duplicates some of the training cases to
#' reach original length of input (default) or length defined by `target_length`.
#'
#' Note that option 'KFold' may result in resamples of slightly different length. Avoid all
#' operations which rely on equal-length vectors. For example, you can't place resamples in a
#' data.frame, but must use a list instead.
#'
#' @param x Vector or data.frame: Usually the outcome; `NROW(x)` defines the sample size.
#' @param config Resampler object created by [setup_Resampler].
#' @param verbosity Integer: Verbosity level.
#'
#' @return `Resampler` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' y <- rnorm(200)
#' # 10-fold (stratified)
#' y_10fold <- resample(y, setup_Resampler(10L, "kfold"))
#' y_10fold
#' # 25 stratified subsamples
#' y_25strat <- resample(y, setup_Resampler(25L, "stratsub"))
#' y_25strat
#' # 100 stratified bootstraps
#' y_100strat <- resample(y, setup_Resampler(100L, "stratboot"))
#' y_100strat
#' # LOOCV
#' y_loocv <- resample(y, setup_Resampler(type = "LOOCV"))
#' y_loocv
resample <- function(
  x,
  config = setup_Resampler(),
  #  index = NULL,
  #  group = NULL,
  verbosity = 1L
) {
  check_is_S7(config, ResamplerConfig)
  # Input ----
  type <- config@type
  if (NCOL(x) > 1) {
    if (survival::is.Surv(x)) {
      msg("Survival object will be stratified on time.", verbosity = verbosity)
      x <- x[, 1]
    } else {
      msg(
        "Input contains more than one column; stratifying on last.",
        verbosity = verbosity
      )
      x <- x[[NCOL(x)]]
    }
  }

  # Stratify on case IDs ----
  id_strat <- if (type != "LOOCV") {
    config@id_strat
  } else {
    NULL
  }

  if (!is.null(id_strat)) {
    # Only keep unique IDs
    idl <- !duplicated(id_strat)
    x <- x[idl]
  }

  if (type == "StratBoot") {
    target_length <- if (is.null(config@target_length)) {
      NROW(x)
    } else {
      config@target_length
    }
  }

  # resample ----
  if (!type %in% c("Bootstrap", "LOOCV")) {
    .stratify_var <- if (is.null(config@stratify_var)) {
      x
    } else {
      config@stratify_var
    }
  }

  n_resamples <- if (type == "LOOCV") length(x) else config@n

  # Print config ----
  if (verbosity > 1L) {
    print(config)
  }

  # Make resamples ----
  if (type == "StratSub") {
    ## StratSub ----
    res_part <- strat_sub(
      x = x,
      n_resamples = n_resamples,
      train_p = config@train_p,
      stratify_var = .stratify_var,
      strat_n_bins = config@strat_n_bins,
      seed = config@seed,
      verbosity = verbosity
    )
  } else if (type == "Bootstrap") {
    ## Bootstrap ----
    res_part <- bootstrap(
      x = x,
      n_resamples = n_resamples,
      seed = config@seed
    )
  } else if (type == "KFold") {
    ## KFold ----
    res_part <- kfold(
      x = x,
      k = n_resamples,
      stratify_var = .stratify_var,
      strat_n_bins = config@strat_n_bins,
      seed = config@seed,
      verbosity = verbosity
    )
  } else if (type == "LOOCV") {
    ## LOOCV ----
    res_part <- loocv(x = x)
    # Get number of resamples
    config@n <- length(res_part)
  } else if (type == "StratBoot") {
    ## StratBoot ----
    res_part <- strat_boot(
      x = x,
      n_resamples = n_resamples,
      train_p = config@train_p,
      stratify_var = .stratify_var,
      strat_n_bins = config@strat_n_bins,
      target_length = target_length,
      seed = config@seed,
      verbosity = verbosity
    )
  }

  # Update strat_n_bins ----
  if (type == "StratSub" || type == "StratBoot") {
    actual_n_bins <- attr(res_part, "strat_n_bins")
    if (actual_n_bins != config@strat_n_bins) {
      if (verbosity > 0L) {
        msg0(
          "Updated strat_n_bins from ",
          config@strat_n_bins,
          " to ",
          actual_n_bins,
          " in ResamplerConfig object."
        )
      }
      config@strat_n_bins <- actual_n_bins
    }
  }

  if (!is.null(id_strat)) {
    ### Get ID by resample ----
    id_by_res <- lapply(res_part, \(x) id_strat[idl][x])
    ### Get resamples on original data with replicates ----
    res_part <- lapply(id_by_res, \(x) which(id_strat %in% x))
  }

  # Output ----
  Resampler(type, res_part, config)
} # /rtemis::resample


#' Bootstrap Resampling
#'
#' @param x Input vector.
#' @param n_resamples Integer: Number of resamples to make.
#' @param seed Integer: If provided, set seed for reproducibility.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
bootstrap <- function(x, n_resamples = 10, seed = NULL) {
  if (!is.null(seed)) {
    set.seed(seed)
  }

  ids <- seq_along(x)
  .length <- length(x)
  if (!is.null(seed)) {
    set.seed(seed)
  }

  res <- lapply(
    seq(n_resamples),
    function(i) sort(sample(ids, .length, replace = TRUE))
  )
  names(res) <- paste0("Bootsrap_", seq(n_resamples))
  res
} # /rtemis::bootstrap


#' K-fold Resampling
#'
#' @inheritParams resample
#' @param x Input Vector.
#' @param k Integer: Number of folds.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
kfold <- function(
  x,
  k = 10,
  stratify_var = NULL,
  strat_n_bins = 4,
  seed = NULL,
  verbosity = TRUE
) {
  if (!is.null(seed)) {
    set.seed(seed)
  }

  if (is.null(stratify_var)) {
    stratify_var <- x
  }
  stratify_var <- as.numeric(stratify_var)
  # ->> update
  max.bins <- length(unique(stratify_var))
  if (max.bins < strat_n_bins) {
    if (max.bins == 1) {
      cli::cli_abort("Only one unique value present in stratify_var.")
    }
    if (verbosity > 0L) {
      msg0("Using max n bins possible = ", max.bins, ".")
    }
    strat_n_bins <- max.bins
  }

  ids <- seq_along(x)
  # cuts
  cuts <- cut(stratify_var, breaks = strat_n_bins, labels = FALSE)
  cut.bins <- sort(unique(cuts))

  # ids by cut
  idl <- lapply(seq_along(cut.bins), function(i) ids[cuts == cut.bins[i]])
  # length of each cut
  # idl.length <- sapply(idl, length)
  idl.length <- as.numeric(table(cuts))

  # split each idl into k folds after randomizing them
  idl.k <- vector("list", length(cut.bins))
  for (i in seq_along(cut.bins)) {
    cut1 <- cut(sample(idl.length[i]), breaks = k, labels = FALSE)
    idl.k[[i]] <- lapply(seq(k), function(j) idl[[i]][cut1 == j])
  }

  res <- lapply(
    seq(k),
    \(i) {
      seq(ids)[-sort(unlist(lapply(seq_along(cut.bins), \(j) idl.k[[j]][[i]])))]
    }
  )

  names(res) <- paste0("Fold_", seq(k))
  attr(res, "strat_n_bins") <- strat_n_bins
  res
} # /rtemis::kfold


#' Resample using Stratified Subsamples
#'
#' @inheritParams resample
#' @param x Input vector
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
strat_sub <- function(
  x,
  n_resamples = 10,
  train_p = .75,
  stratify_var = NULL,
  strat_n_bins = 4,
  seed = NULL,
  verbosity = TRUE
) {
  if (!is.null(seed)) {
    set.seed(seed)
  }
  if (is.null(stratify_var)) {
    stratify_var <- x
  }
  stratify_var <- as.numeric(stratify_var)
  max.bins <- length(unique(stratify_var))
  if (max.bins < strat_n_bins) {
    if (verbosity > 0L) {
      msg("Using max n bins possible =", max.bins)
    }
    strat_n_bins <- max.bins
  }
  ids <- seq_along(x)
  cuts <- cut(stratify_var, breaks = strat_n_bins, labels = FALSE)
  cut.bins <- sort(unique(cuts))
  idl <- lapply(seq_along(cut.bins), function(i) ids[cuts == cut.bins[i]])
  idl.length <- as.numeric(table(cuts))
  res <- lapply(seq(n_resamples), function(i) {
    sort(unlist(sapply(seq_along(cut.bins), function(j) {
      sample(idl[[j]], train_p * idl.length[j])
    })))
  })
  names(res) <- paste0("Subsample_", seq(n_resamples))
  attr(res, "strat_n_bins") <- strat_n_bins
  res
} # /rtemis::strat_sub


#' Stratified Bootstrap Resampling
#'
#' @inheritParams resample
#' @param x Input vector
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
strat_boot <- function(
  x,
  n_resamples = 10,
  train_p = .75,
  stratify_var = NULL,
  strat_n_bins = 4,
  target_length = NULL,
  seed = NULL,
  verbosity = TRUE
) {
  if (!is.null(seed)) {
    set.seed(seed)
  }

  res_part1 <- strat_sub(
    x = x,
    n_resamples = n_resamples,
    train_p = train_p,
    stratify_var = stratify_var,
    strat_n_bins = strat_n_bins,
    verbosity = verbosity
  )

  # Make sure target_length was not too short by accident
  res.length <- length(res_part1[[1]])
  if (is.null(target_length)) {
    target_length <- length(x)
  }
  if (target_length < res.length) {
    target_length <- length(x)
  }

  # Add back this many cases
  add.length <- target_length - res.length
  doreplace <- ifelse(add.length > res.length, 1, 0)
  res_part2 <- lapply(
    res_part1,
    function(i) sample(i, add.length, replace = doreplace)
  )
  res <- mapply(c, res_part1, res_part2, SIMPLIFY = FALSE)
  res <- lapply(res, sort)
  names(res) <- paste0("StratBoot_", seq(n_resamples))
  attr(res, "strat_n_bins") <- strat_n_bins
  res
} # /rtemis::strat_boot


#' Leave-one-out Resampling
#'
#' @param x Input vector
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
loocv <- function(x) {
  res <- lapply(seq(x), function(i) (seq(x))[-i])
  names(res) <- paste0("Fold_", seq(res))
  res
} # /rtemis::loocv


================================================
FILE: R/rtemis-package.R
================================================
# rtemis-package.R
# ::rtemis::
# 2015- EDG rtemis.org

#' \pkg{rtemis}: Advanced Machine Learning and Visualization
#'
#' @description
#' Advanced Machine Learning & Visualization made efficient, accessible, reproducible
#'
#' @section Online Documentation and Vignettes:
#' <https://docs.rtemis.org/r/ml>
#'
#' @section System Setup:
#' There are some options you can define in your .Rprofile (usually found in your home directory),
#' so you do not have to define each time you execute a function.
#' \describe{
#'     \item{rtemis_theme}{General plotting theme; set to e.g. "whiteigrid" or "darkgraygrid"}
#'     \item{rtemis_font}{Font family to use in plots.}
#'     \item{rtemis_palette}{Name of default palette to use in plots. See options by running `get_palette()`}
#' }
#' @section Visualization:
#' Graphics are handled using the `draw` family, which produces interactive plots primarily using
#' `plotly` and other packages.
#'
#' @section Supervised Learning:
#' By convention, the last column of the data is the outcome variable, and all other columns are
#' predictors. Convenience function [set_outcome] can be used to move a specified column to the
#' end of the data.
#' Regression and Classification is performed using `train()`.
#' This function allows you to preprocess, train, tune, and test models on multiple resamples.
#' Use [available_supervised] to get a list of available algorithms
#'
#' @section Classification:
#' For training of binary classification models, the outcome should be provided as a factor,
#' with the *second* level of the factor being the 'positive' class.
#'
#' @section Clustering:
#' Clustering is performed using `cluster()`.
#' Use [available_clustering] to get a list of available algorithms.
#'
#' @section Decomposition:
#' Decomposition is performed using `decomp()`.
#' Use [available_decomposition] to get a list of available algorithms.
#'
#' @section Type Documentation:
#' Function documentation includes input type (e.g. "Character", "Integer",
#' "Float"/"Numeric", etc).
#' When applicable, value ranges are provided in interval notation. For example, Float: [0, 1)
#' means floats between 0 and 1 including 0, but excluding 1.
#' Categorical variables may include set of allowed values using curly braces.
#' For example, Character: \{"future", "mirai", "none"\}.
#'
#' @section Tabular Data:
#' \pkg{rtemis} internally uses methods for efficient handling of tabular data, with support for
#' `data.frame`, `data.table`, and `tibble`. If a function is documented as accepting
#' "tabular data", it should work with any of these data structures. If a function is documented
#' as accepting only one of these, then it should only be used with that structure.
#' For example, some optimized `data.table` operations that perform in-place modifications only
#' work with `data.table` objects.
#'
#' @name rtemis-package
#' @import stats methods graphics grDevices S7 data.table htmltools
#' @importFrom utils packageVersion sessionInfo getFromNamespace head tail
"_PACKAGE"

NULL


================================================
FILE: R/rtemis_color_system.R
================================================
#' rtemis Color System
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
rtemis_light_teal <- "#00fdfd"
rtemis_light_blue <- "#30cefe"
rtemis_teal <- "#00b2b2"
kaimana_red <- "#ff004c"
kaimana_blue <- "#0067e0"
kaimana_light_blue <- "#479cff"
coastside_orange <- "#ff9f20"
rtemis_orange <- "#ff4f36"
kaimana_green <- "#00ffb3"
kaimana_med_green <- "#00996b"
rtemis_purple <- "#6125f7"
rtemis_magenta <- "#912ac8"
rtemis_magenta_light <- "#b25bd6"
magenta <- "#ff00ff"
lmd_burgundy <- "#a92459"
rtms_gray <- "#808080"

rt_gray <- rtms_gray
rt_red <- kaimana_red
rt_blue <- kaimana_light_blue
rt_green <- kaimana_med_green
rt_orange <- coastside_orange
rt_teal <- rtemis_teal
rt_purple <- rtemis_purple
rt_magenta <- rtemis_magenta_light

# %% rtemis colors ----
highlight_col <- rt_orange
col_object <- rt_gray # object name in repr_S7name
col_info <- highlight2_col <- lmd_burgundy
col_outer <- rt_green
col_tuner <- rt_blue
col_calibrator <- rt_magenta


#' rtemis Color System
#'
#' A named list of colors used consistently across all packages
#' in the rtemis ecosystem.
#'
#' Colors are provided as hex strings.
#'
#' @format A named list with the following elements:
#' \describe{
#'   \item{red}{"kaimana red"}
#'   \item{blue}{"kaimana light blue"}
#'   \item{green}{"kaimana medium green"}
#'   \item{orange}{"coastside orange"}
#'   \item{teal}{"rtemis teal"}
#'   \item{purple}{"rtemis purple"}
#'   \item{magenta}{"rtemis magenta"}
#'   \item{highlight_col}{"highlight color"}
#'   \item{object}{"rtemis teal"}
#'   \item{info}{"lmd burgundy"}
#'   \item{outer}{"kaimana red"}
#'   \item{tuner}{"coastside orange"}
#' }
#'
#' @examples
#' rtemis_colors[["orange"]]
#'
#' @author EDG
#'
#' @export
#'
#' @examples
#' rtemis_colors[["teal"]]
rtemis_colors <- list(
  red = rt_red,
  blue = rt_blue,
  green = rt_green,
  orange = rt_orange,
  teal = rt_teal,
  purple = rt_purple,
  magenta = rt_magenta,
  highlight_col = highlight_col,
  object = col_object,
  info = col_info,
  outer = col_outer,
  tuner = col_tuner
) # /rtemis.utils::rtemis_colors


================================================
FILE: R/theme.R
================================================
# theme.R
# ::rtemis::
# EDG rtemis.org

# %% Black ----
#' Themes for `draw_*` functions
#'
#' @param bg Color: Figure background.
#' @param plot_bg Color: Plot region background.
#' @param fg Color: Foreground color used as default for multiple elements like
#' axes and labels, which can be defined separately.
#' @param pch Integer: Point character.
#' @param cex Float: Character expansion factor.
#' @param lwd Float: Line width.
#' @param bty Character: Box type:  "o", "l", "7", "c", "u", or "]", or "n".
#' @param box_col Box color if `bty != "n"`.
#' @param box_alpha Float: Box alpha.
#' @param box_lty Integer: Box line type.
#' @param box_lwd Float: Box line width.
#' @param grid Logical: If TRUE, draw grid in plot regions.
#' @param grid_nx Integer: N of vertical grid lines.
#' @param grid_ny Integer: N of horizontal grid lines.
#' @param grid_col Grid color.
#' @param grid_alpha Float: Grid alpha.
#' @param grid_lty Integer: Grid line type.
#' @param grid_lwd Float: Grid line width.
#' @param axes_visible Logical: If TRUE, draw axes.
#' @param axes_col Axes colors.
#' @param tick_col Tick color.
#' @param tick_alpha Float: Tick alpha.
#' @param tick_labels_col Tick labels' color.
#' @param tck `graphics::parr`'s tck argument: Tick length, can be negative.
#' @param tcl `graphics::parr`'s tcl argument.
#' @param x_axis_side Integer: Side to place x-axis.
#' @param y_axis_side Integer: Side to place y-axis.
#' @param labs_col Labels' color.
#' @param x_axis_line Numeric: `graphics::axis`'s `line` argument for the x-axis.
#' @param x_axis_las Numeric: `graphics::axis`'s `las` argument for the x-axis.
#' @param x_axis_padj Numeric: x-axis' `padj`: Adjustment for the x-axis
#' tick labels' position.
#' @param x_axis_hadj Numeric: x-axis' `hadj`.
#' @param y_axis_line Numeric: `graphics::axis`'s `line` argument for the y-axis.
#' @param y_axis_las Numeric: `graphics::axis`'s `las` argument for the y-axis.
#' @param y_axis_padj Numeric: y-axis' `padj`.
#' @param y_axis_hadj Numeric: y-axis' `hadj`.
#' @param xlab_line Numeric: Line to place `xlab`.
#' @param ylab_line Numeric: Line to place `ylab`.
#' @param zerolines Logical: If TRUE, draw lines on x = 0, y = 0, if within
#' plot limits.
#' @param zerolines_col Zerolines color.
#' @param zerolines_alpha Float: Zerolines alpha.
#' @param zerolines_lty Integer: Zerolines line type.
#' @param zerolines_lwd Float: Zerolines line width.
#' @param main_line Float: How many lines away from the plot region to draw
#' title.
#' @param main_adj Float: How to align title.
#' @param main_font Integer: 1: Regular, 2: Bold.
#' @param main_col Title color.
#' @param font_family Character: Font to be used throughout plot.
#'
#' @return `Theme` object.
#'
#' @rdname theme
#' @export
#'
#' @examples
#' theme <- theme_black(font_family = "Geist")
#' theme
theme_black <- function(
  bg = "#000000",
  plot_bg = "transparent",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = FALSE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = fg,
  grid_alpha = .2,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = .5,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "black",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_black


#' @rdname theme
#' @export
theme_blackgrid <- function(
  bg = "#000000",
  plot_bg = "transparent",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = fg,
  grid_alpha = .2,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "blackgrid",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_blackgrid


#' @rdname theme
#' @export
theme_blackigrid <- function(
  bg = "#000000",
  plot_bg = "#1A1A1A",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = bg,
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "blackigrid",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_darkgrid


#' @rdname theme
#' @export
theme_darkgray <- function(
  bg = "#121212",
  plot_bg = "transparent",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = FALSE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = fg,
  grid_alpha = .2,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = .5,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "darkgray",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_darkgray


#' @rdname theme
#' @export
theme_darkgraygrid <- function(
  bg = "#121212",
  plot_bg = "transparent",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = "#404040",
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "#00000000",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "darkgraygrid",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_darkgraygrid


#' @rdname theme
#' @export
theme_darkgrayigrid <- function(
  bg = "#121212",
  plot_bg = "#202020",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = bg,
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "transparent",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "darkgrayigrid",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_darkgrayigrid


# %% White ----
#' @rdname theme
#' @export
theme_white <- function(
  bg = "#ffffff",
  plot_bg = "transparent",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = FALSE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = fg,
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = .5,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "white",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_white

#' @rdname theme
#' @export
theme_whitegrid <- function(
  bg = "#ffffff",
  plot_bg = "transparent",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = "#c0c0c0",
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "#00000000",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "whitegrid",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_whitegrid

#' @rdname theme
#' @export
theme_whiteigrid <- function(
  bg = "#ffffff",
  plot_bg = "#E6E6E6",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = bg,
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "transparent",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "whiteigrid",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_whiteigrid


# %% Gray ----
#' @rdname theme
#' @export
theme_lightgraygrid <- function(
  bg = "#dfdfdf",
  plot_bg = "transparent",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = "#c0c0c0",
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "#00000000",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "lightgraygrid",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_lightgray


#' @rdname theme
#' @export
theme_mediumgraygrid <- function(
  bg = "#b3b3b3",
  plot_bg = "transparent",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  # box --
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = .5,
  # grid --
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = "#d0d0d0",
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  # axes --
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "#00000000",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = .5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = .5,
  y_axis_hadj = .5,
  xlab_line = 1.4,
  ylab_line = 2,
  # zerolines --
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = .5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  # title --
  main_line = .25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
) {
  Theme(
    name = "mediumgraygrid",
    config = list(
      bg = bg,
      plot_bg = plot_bg,
      fg = fg,
      pch = pch,
      cex = cex,
      lwd = lwd,
      # box --
      bty = bty,
      box_col = box_col,
      box_alpha = box_alpha,
      box_lty = box_lty,
      box_lwd = box_lwd,
      # grid --
      grid = grid,
      grid_nx = grid_nx,
      grid_ny = grid_ny,
      grid_col = grid_col,
      grid_alpha = grid_alpha,
      grid_lty = grid_lty,
      grid_lwd = grid_lwd,
      # axes --
      axes_visible = axes_visible,
      axes_col = axes_col,
      tick_col = tick_col,
      tick_alpha = tick_alpha,
      tick_labels_col = tick_labels_col,
      tck = tck,
      tcl = tcl,
      x_axis_side = x_axis_side,
      y_axis_side = y_axis_side,
      labs_col = labs_col,
      x_axis_line = x_axis_line,
      x_axis_las = x_axis_las,
      x_axis_padj = x_axis_padj,
      x_axis_hadj = x_axis_hadj,
      y_axis_line = y_axis_line,
      y_axis_las = y_axis_las,
      y_axis_padj = y_axis_padj,
      y_axis_hadj = y_axis_hadj,
      xlab_line = xlab_line,
      ylab_line = ylab_line,
      # zerolines --
      zerolines = zerolines,
      zerolines_col = zerolines_col,
      zerolines_alpha = zerolines_alpha,
      zerolines_lty = zerolines_lty,
      zerolines_lwd = zerolines_lwd,
      # title --
      main_line = main_line,
      main_adj = main_adj,
      main_font = main_font,
      main_col = main_col,
      font_family = font_family
    )
  )
} # /rtemis::theme_mediumdgray


#' Print available \pkg{rtemis} themes
#'
#' @return Called for its side effect of printing available themes.
#'
#' @author EDG
#' @export
#'
#' @examples
#' available_themes()
available_themes <- function() {
  cat(highlight("  Available themes:\n"))
  cat('    "white", "whitegrid", "whiteigrid,\n')
  cat('    "black", "blackgrid", "blackigrid",\n')
  cat('    "darkgray", "darkgraygrid", "darkgrayigrid",\n')
  cat('    "lightgraygrid", "mediumgraygrid"\n')
  invisible()
}


# %% choose_theme ----
#' Select an rtemis theme
#'
#' @details
#' If `x` is not defined, `choose_theme()` will use `getOption("rtemis_theme", "whitegrid")` to
#' select the theme. This allows users to set a default theme for all rtemis plots by setting
#' `options(rtemis_theme = "theme_name")` at any point.
#'
#' @param x Character: Name of theme to select. If not defined, will use `getOption("rtemis_theme", "whitegrid")`.
#' @param override Optional List: Theme parameters to override defaults.
#'
#' @return `Theme` object.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # Get default theme set by options(rtemis_theme = "theme_name").
#' # If not set, defaults to "whitegrid":
#' choose_theme()
#' # Get darkgraygrid theme. Same as `theme_darkgraygrid()`:
#' choose_theme("darkgraygrid")
#' # This will use the default theme, and override the foreground color to red:
#' choose_theme(override = list(fg = "#ff0000"))
choose_theme <- function(
  x = c(
    "white",
    "whitegrid",
    "whiteigrid",
    "black",
    "blackgrid",
    "blackigrid",
    "darkgray",
    "darkgraygrid",
    "darkgrayigrid",
    "lightgraygrid",
    "mediumgraygrid"
  ),
  override = NULL
) {
  if (length(x) > 1) {
    x <- getOption("rtemis_theme", "whitegrid")
  }
  if (is.null(override)) {
    override <- list()
  }

  do_call(paste0("theme_", x), override)
} # /rtemis::theme


================================================
FILE: R/train.R
================================================
# train.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% train ----
#' Train Supervised Learning Models
#'
#' @description
#' Preprocess, tune, train, and test supervised learning models using nested resampling in a single
#' call.
#'
#' @param x Tabular data, i.e. data.frame, data.table, or tbl_df (tibble): Training set data.
#' @param dat_validation Tabular data: Validation set data.
#' @param dat_test Tabular data: Test set data.
#' @param weights Optional vector of case weights.
#' @param algorithm Character: Algorithm to use. Can be left NULL, if `hyperparameters` is defined.
#' @param preprocessor_config Optional PreprocessorConfig object: Setup using [setup_Preprocessor].
#' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions.
#' @param tuner_config TunerConfig object: Setup using [setup_GridSearch].
#' @param outer_resampling_config Optional ResamplerConfig object: Setup using [setup_Resampler].
#' This defines the outer resampling method, i.e. the splitting into training and test sets for the
#' purpose of assessing model performance. If NULL, no outer resampling is performed, in which case
#' you might want to use a `dat_test` dataset to assess model performance on a single test set.
#' @param execution_config `ExecutionConfig` object: Setup using [setup_ExecutionConfig]. This
#' allows you to set backend ("future", "mirai", or "none"), number of workers, and future plan if
#' using `backend = "future"`.
#' @param question Optional character string defining the question that the model is trying to
#' answer.
#' @param outdir Character, optional: String defining the output directory.
#' @param verbosity Integer: Verbosity level.
#' @param ... Not used.
#'
#' @details
#' **Online book & documentation**
#'
#' See [docs.rtemis.org/r](https://docs.rtemis.org/r/) for detailed documentation.
#'
#' **Preprocessing**
#'
#' There are many different stages at which preprocessing could be applied, when running a
#' supervised learning pipeline with nested resampling. Some operations are best done before
#' passing data to `train()`:
#'
#' - Duplicate rows should be removed before resampling, so that duplicates don't end up in
#' different resamples, e.g. one in training and one in test.
#' - Constant columns should be removed before resampling. A column may appear constant in a small
#' resample, even if it is not constant in the full dataset. Removing it inconsistently will
#' throw an error during prediction.
#' - All data-dependent preprocessing steps need to be performed on training data only and applied
#' on validation and test data, e.g. scaling, centering, imputation.
#'
#' User-defined preprocessing through `preprocessor_config` is applied on training set data,
#' the learned parameters are stored in the returned Supervised or SupervisedRes object, and the
#' preprocessing is applied on validation and test data.
#'
#' **Binary Classification**
#'
#' For binary classification, the outcome should be a factor where *the 2nd level
#' corresponds to the positive class*.
#'
#' **Resampling**
#'
#' Note that you should not use an outer resampling method with
#' replacement if you will also be using an inner resampling (for tuning).
#' The duplicated cases from the outer resampling may appear both in the
#' training and test sets of the inner resamples, leading to underestimated
#' test error.
#'
#' **Reproducibility**
#'
#' If using ***outer resampling***, you can set a seed when defining `outer_resampling_config`, e.g.
#' ```r
#' outer_resampling_config = setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L)
#' ```
#' If using ***tuning with inner resampling***, you can set a seed when defining `tuner_config`,
#' e.g.
#' ```r
#' tuner_config = setup_GridSearch(
#'   resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold", seed = 2027L)
#' )
#' ```
#'
#' **Parallelization**
#'
#' There are three levels of parallelization that may be used during training:
#'
#' 1. Algorithm training (e.g. a parallelized learner like LightGBM)
#' 2. Tuning (inner resampling, where multiple resamples can be processed in parallel)
#' 3. Outer resampling (where multiple outer resamples can be processed in parallel)
#'
#' The `train()` function will automatically manage parallelization depending
#' on:
#' - The number of workers specified by the user using `n_workers`
#' - Whether the training algorithm supports parallelization itself
#' - Whether hyperparameter tuning is needed
#'
#' @return Object of class `Regression(Supervised)`, `RegressionRes(SupervisedRes)`,
#' `Classification(Supervised)`, or `ClassificationRes(SupervisedRes)`.
#'
#' @author EDG
#' @export
#'
#' @examples
#' \donttest{
#' iris_c_lightRF <- train(
#'    iris,
#'    algorithm = "LightRF",
#'    outer_resampling_config = setup_Resampler(),
#' )
#' }
train <- function(
  x,
  dat_validation = NULL,
  dat_test = NULL,
  weights = NULL,
  algorithm = NULL,
  preprocessor_config = NULL, # PreprocessorConfig
  hyperparameters = NULL, # Hyperparameters
  tuner_config = NULL, # TunerConfig
  outer_resampling_config = NULL, # ResamplerConfig
  execution_config = setup_ExecutionConfig(), # ExecutionConfig
  question = NULL,
  outdir = NULL,
  verbosity = 1L,
  ...
) {
  # SuperConfigLive dispatch ----
  if (S7_inherits(x, SuperConfigLive)) {
    return(train(
      x = x@dat_training,
      dat_validation = x@dat_validation,
      dat_test = x@dat_test,
      weights = x@weights,
      preprocessor_config = x@preprocessor_config,
      algorithm = x@algorithm,
      hyperparameters = x@hyperparameters,
      tuner_config = x@tuner_config,
      outer_resampling_config = x@outer_resampling_config,
      execution_config = x@execution_config,
      question = x@question,
      outdir = x@outdir,
      verbosity = x@verbosity
    ))
  } # / train.SuperConfigLive

  # SuperConfig dispatch ----
  if (S7_inherits(x, SuperConfig)) {
    dat_training <- read(x@dat_training_path, character2factor = TRUE)
    dat_validation <- if (!is.null(x@dat_validation_path)) {
      read(x@dat_validation_path)
    } else {
      NULL
    }
    dat_test <- if (!is.null(x@dat_test_path)) {
      read(x@dat_test_path)
    } else {
      NULL
    }
    # Call train() with data and other parameters from config
    return(train(
      x = dat_training,
      dat_validation = dat_validation,
      dat_test = dat_test,
      weights = x@weights,
      preprocessor_config = x@preprocessor_config,
      algorithm = x@algorithm,
      hyperparameters = x@hyperparameters,
      tuner_config = x@tuner_config,
      outer_resampling_config = x@outer_resampling_config,
      execution_config = x@execution_config,
      question = x@question,
      outdir = x@outdir,
      verbosity = x@verbosity
    ))
  } # / train.SuperConfig

  # Checks ----
  if (is.null(hyperparameters) && is.null(algorithm)) {
    cli::cli_abort(
      "You must define either {.arg hyperparameters} or {.arg algorithm}."
    )
  }

  extra_args <- list(...)
  if (length(extra_args) > 0L) {
    cli::cli_abort(
      "Unused extra arguments were provided: {.val {names(extra_args)}}. Please check your function call."
    )
  }

  if (is.null(algorithm) && !is.null(hyperparameters)) {
    algorithm <- hyperparameters@algorithm
  }

  type <- supervised_type(x)
  ncols <- ncol(x)

  if (is.null(hyperparameters) && !is.null(algorithm)) {
    hyperparameters <- get_default_hyperparameters(
      algorithm,
      type = type,
      ncols = ncols
    )
  }

  if (
    !is.null(algorithm) &&
      tolower(algorithm) != tolower(hyperparameters@algorithm)
  ) {
    cli::cli_abort(
      "You defined algorithm to be '{algorithm}', but defined hyperparameters for {hyperparameters@algorithm}."
    )
  }

  check_is_S7(hyperparameters, Hyperparameters)

  # Set default tuner_config if tuning is needed but none specified
  if (needs_tuning(hyperparameters) && is.null(tuner_config)) {
    tuner_config <- setup_GridSearch()
  }

  if (!is.null(tuner_config)) {
    check_is_S7(tuner_config, TunerConfig)
  }

  if (!is.null(preprocessor_config)) {
    check_is_S7(preprocessor_config, PreprocessorConfig)
  }

  # execution_config must always be set
  check_is_S7(execution_config, ExecutionConfig)
  # Override parallelization parameters with those from execution_config
  backend <- execution_config@backend
  n_workers <- execution_config@n_workers
  future_plan <- execution_config@future_plan

  # If outer_resampling_config is set, dat_validation and dat_test must be NULL
  if (!is.null(outer_resampling_config)) {
    if (!is.null(dat_validation) || !is.null(dat_test)) {
      cli::cli_abort(
        "If outer_resampling_config is set, {.arg dat_validation} and {.arg dat_test} must be NULL."
      )
    }
  }

  if (backend == "future" && future_plan == "mirai_multisession") {
    future_plan <- "future.mirai::mirai_multisession"
  }
  if (!is.null(outer_resampling_config)) {
    check_is_S7(outer_resampling_config, ResamplerConfig)
    if (!is.null(outer_resampling_config[["id_strat"]])) {
      stopifnot(length(outer_resampling_config[["id_strat"]]) == NROW(x))
    }
  }

  algorithm <- get_alg_name(algorithm)
  if (!is.null(outdir)) {
    outdir <- make_path(outdir)
    if (!dir.exists(outdir)) {
      dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
    }
    if (verbosity > 1L) {
      msg_info("Output directory set to ", outdir, ".")
    }
  }

  logfile <- if (!is.null(outdir)) {
    paste0(
      outdir,
      "/",
      "train_",
      algorithm,
      "_",
      format(Sys.time(), "%Y%m%d.%H%M%S"),
      ".log"
    )
  } else {
    NULL
  }

  # Start timer & logfile ----
  start_time <- intro(verbosity = verbosity, logfile = logfile)

  # Data ----
  if (type == "Classification") {
    classes <- levels(outcome(x))
  }

  ## Print data summary ----
  if (verbosity > 0L) {
    summarize_supervised(
      x = x,
      dat_validation = dat_validation,
      dat_test = dat_test
    )
  }

  # Init ----
  workers <- get_n_workers(
    algorithm = algorithm,
    hyperparameters = hyperparameters,
    outer_resampling_config = outer_resampling_config,
    n_workers = n_workers,
    verbosity = verbosity
  )
  hyperparameters@n_workers <- workers[["algorithm"]]
  tuner <- NULL

  # Set backend to "none" if workers[["tuning"]] == 1L
  backend <- if (workers[["tuning"]] == 1L) {
    "none"
  } else {
    backend
  }

  # Preprocessors ----
  # `preprocessor`: User-level preprocessing (Preprocessor object created from
  #   `setup_Preprocessor`). Handles scaling, imputation, encoding, etc.
  # `preprocessor_internal`: Algorithm-level preprocessing (Preprocessor object
  #   returned by each train_*() method). Handles transformations the algorithm
  #   requires internally (e.g. factor-to-integer conversion for LightGBM).
  # Both are stored on the trained model so predict() can re-apply them in order:
  # user-level first, then algorithm-level.
  # Initialized to NULL here; set in the single-model path below.
  # In the outer resampling path, each sub-model carries its own pair.
  preprocessor <- preprocessor_internal <- NULL

  # === Outer Resampling ===
  # Splits data into multiple training-test folds and calls train() recursively
  # on each. Each recursive call enters the Single Model path below (which may
  # itself tune via inner resampling). After all folds complete, execution falls
  # through to the Outer Aggregation path.
  if (!is.null(outer_resampling_config)) {
    msg0(
      fmt("<> ", col = col_outer, bold = TRUE),
      "Training ",
      highlight(paste(algorithm, type)),
      " using ",
      desc(outer_resampling_config),
      "...",
      verbosity = verbosity
    )
    outer_resampler <- resample(
      x,
      config = outer_resampling_config,
      verbosity = verbosity
    )
    models <- lapply(
      cli::cli_progress_along(
        seq_len(outer_resampler@config@n),
        name = "Training outer resamples...",
        type = "tasks"
      ),
      function(i) {
        train(
          x = x[outer_resampler[[i]], ],
          dat_test = x[-outer_resampler[[i]], ],
          algorithm = algorithm,
          preprocessor_config = preprocessor_config,
          hyperparameters = hyperparameters,
          tuner_config = tuner_config,
          outer_resampling_config = NULL, # This model is one of the outer resamples.
          execution_config = execution_config,
          weights = if (!is.null(weights)) {
            weights[outer_resampler[[i]]]
          } else {
            NULL
          },
          question = question,
          verbosity = verbosity - 1L
        )
      }
    )
    names(models) <- names(outer_resampler@resamples)
    hyperparameters@resampled <- 1L
    msg(
      fmt("</>", col = col_outer, bold = TRUE),
      "Outer resampling done.",
      verbosity = verbosity
    )
  } # /Outer Resampling

  if (hyperparameters@resampled == 0L) {
    # === Inner path ===
    # Trains one model: optionally tune (inner resampling) → preprocess →
    # train algorithm → predict → returns Supervised.
    # Skipped when outer resampling was performed (resampled == 1L).

    # Tune ----
    # Inner resampling for hyperparameter optimization.
    if (needs_tuning(hyperparameters)) {
      tuner <- tune(
        x = x,
        hyperparameters = hyperparameters,
        tuner_config = tuner_config,
        preprocessor_config = preprocessor_config,
        weights = weights,
        backend = backend,
        future_plan = future_plan,
        n_workers = workers[["tuning"]],
        verbosity = verbosity
      )
      # Update hyperparameters
      hyperparameters <- update(
        hyperparameters,
        tuner@best_hyperparameters,
        tuned = 1L
      )
    } # /Tune

    # User-level preprocessing ----
    if (!is.null(preprocessor_config)) {
      preprocessor <- preprocess(
        x = x,
        config = preprocessor_config,
        dat_validation = dat_validation,
        dat_test = dat_test
      )
      x <- if (is.null(dat_validation) && is.null(dat_test)) {
        preprocessor@preprocessed
      } else {
        preprocessor@preprocessed[["training"]]
      }
      if (!is.null(dat_validation)) {
        dat_validation <- preprocessor@preprocessed[["validation"]]
      }
      if (!is.null(dat_test)) dat_test <- preprocessor@preprocessed[["test"]]
    } else {
      preprocessor <- NULL
    } # /User-level preprocessing

    # IFW ----
    # Weight calculation must follow preprocessing since N cases may change.
    if (type == "Classification" && hyperparameters[["ifw"]]) {
      if (!is.null(weights)) {
        cli::cli_abort("Custom weights are defined, but IFW is set to TRUE.")
      } else {
        weights <- ifw(x[[ncols]], type = "case_weights", verbosity = verbosity)
      }
    } # /IFW

    # Train algorithm ----
    if (is_tuned(hyperparameters)) {
      msg(
        "Training",
        highlight(paste(algorithm, type)),
        "with tuned hyperparameters...",
        verbosity = verbosity
      )
    } else {
      msg0(
        "Training ",
        highlight(paste(algorithm, type)),
        "...",
        verbosity = verbosity
      )
    }
    # Validation data is only passed to learners that use early stopping.
    # For other learners, validation metrics are collected during tuning.
    dat_validation_for_training <- if (algorithm %in% early_stopping_algs) {
      dat_validation
    } else {
      NULL
    }

    trained <- train_(
      hyperparameters = hyperparameters,
      x = x,
      weights = weights,
      dat_validation = dat_validation_for_training,
      execution_config = execution_config, # used by LightRuleFit
      verbosity = verbosity
    )

    model <- trained[["model"]]
    # Algorithm-level preprocessing (e.g. factor-to-integer for LightGBM),
    # returned by train_*() if needed.
    preprocessor_internal <- trained[["preprocessor"]]

    # Predictions ----
    predicted_prob_training <- predicted_prob_validation <- predicted_prob_test <- NULL

    # Re-apply algorithm-level preprocessing before predicting on each dataset.
    x_features <- features(x)
    if (!is.null(preprocessor_internal)) {
      x_features <- preprocess(
        x_features,
        preprocessor_internal,
        verbosity = 0L
      ) |>
        preprocessed()
    }

    predicted_training <- predict_super(
      model = model,
      newdata = x_features,
      type = type
    )

    if (type == "Classification") {
      predicted_prob_training <- predicted_training
      predicted_training <- prob2categorical(
        predicted_prob_training,
        levels = classes
      )
    }

    predicted_validation <- predicted_test <- NULL
    if (!is.null(dat_validation)) {
      dat_validation_features <- features(dat_validation)
      if (!is.null(preprocessor_internal)) {
        dat_validation_features <- preprocess(
          dat_validation_features,
          preprocessor_internal,
          verbosity = 0L
        ) |>
          preprocessed()
      }

      predicted_validation <- predict_super(
        model = model,
        newdata = dat_validation_features,
        type = type
      )

      if (type == "Classification") {
        predicted_prob_validation <- predicted_validation
        predicted_validation <- prob2categorical(
          predicted_prob_validation,
          levels = classes
        )
      }
    }

    if (!is.null(dat_test)) {
      dat_test_features <- features(dat_test)
      if (!is.null(preprocessor_internal)) {
        dat_test_features <- preprocess(
          dat_test_features,
          preprocessor_internal,
          verbosity = 0L
        ) |>
          preprocessed()
      }

      predicted_test <- predict_super(
        model = model,
        newdata = dat_test_features,
        type = type
      )

      if (type == "Classification") {
        predicted_prob_test <- predicted_test
        predicted_test <- prob2categorical(
          predicted_prob_test,
          levels = classes
        )
      }
    }

    # Standard Errors ----
    # Use the same (algorithm-level preprocessed) features as predictions.
    se_training <- se_validation <- se_test <- NULL
    if (type == "Regression" && algorithm %in% se_compat_algorithms) {
      se_training <- se_super(model = model, newdata = x_features)
      if (!is.null(dat_validation)) {
        se_validation <- se_super(
          model = model,
          newdata = dat_validation_features
        )
      }
      if (!is.null(dat_test)) {
        se_test <- se_super(model = model, newdata = dat_test_features)
      }
    }

    # Return Supervised ----
    mod <- make_Supervised(
      algorithm = algorithm,
      model = model,
      preprocessor = preprocessor,
      preprocessor_internal = preprocessor_internal,
      hyperparameters = hyperparameters,
      tuner = tuner,
      execution_config = execution_config,
      y_training = x[[ncols]],
      y_validation = if (!is.null(dat_validation)) dat_validation[[ncols]],
      y_test = if (!is.null(dat_test)) dat_test[[ncols]],
      predicted_training = predicted_training,
      predicted_validation = predicted_validation,
      predicted_test = predicted_test,
      predicted_prob_training = predicted_prob_training,
      predicted_prob_validation = predicted_prob_validation,
      predicted_prob_test = predicted_prob_test,
      se_training = se_training,
      se_validation = se_validation,
      se_test = se_test,
      xnames = names(x)[-ncols],
      varimp = varimp_super(model = model),
      question = question
    )
  } else {
    # === Outer Aggregation path ===
    # Reached after outer resampling. Each sub-model (Supervised) in `models`
    # carries its own preprocessor pair. Aggregate results → SupervisedRes.
    y_training <- lapply(models, function(mod) mod@y_training)
    y_test <- lapply(models, function(mod) mod@y_test)
    predicted_training <- lapply(models, function(mod) mod@predicted_training)
    predicted_test <- lapply(models, function(mod) mod@predicted_test)
    if (type == "Classification") {
      predicted_prob_training <- lapply(
        models,
        function(mod) mod@predicted_prob_training
      )
      predicted_prob_test <- lapply(
        models,
        function(mod) mod@predicted_prob_test
      )
    } else {
      predicted_prob_training <- predicted_prob_test <- NULL
    }
    # Return SupervisedRes ----
    mod <- make_SupervisedRes(
      algorithm = algorithm,
      type = type,
      models = models,
      preprocessor = preprocessor,
      preprocessor_internal = preprocessor_internal,
      hyperparameters = hyperparameters,
      tuner_config = tuner_config,
      outer_resampler = outer_resampler,
      execution_config = execution_config,
      y_training = y_training,
      y_test = y_test,
      predicted_training = predicted_training,
      predicted_test = predicted_test,
      predicted_prob_training = predicted_prob_training,
      predicted_prob_test = predicted_prob_test,
      xnames = names(x)[-ncols],
      varimp = lapply(models, \(mod) mod@varimp),
      question = question
    )
  }

  # Outro ----
  if (verbosity > 0L) {
    message()
    print(mod)
    message()
  }
  if (!is.null(outdir)) {
    rt_save(mod, outdir = outdir, file_prefix = paste0("train_", algorithm))
  }
  outro(
    start_time,
    logfile = logfile,
    verbosity = verbosity
  )
  # Print object to logfile
  if (!is.null(logfile)) {
    cat(
      "\n",
      repr(mod, output_type = "plain"),
      file = logfile,
      append = TRUE,
      sep = ""
    )
  }
  mod
} # /rtemis::train


# %% get_n_workers ----
# Function to assign number of workers to algorithm, tuning, or outer resampling
# based on whether algorithm is parallelized, tuning is needed, and outer resampling is set.

#' Get Number of Workers
#'
#' Distribute workers across different parallelization levels: algorithm training,
#' tuning (inner resampling), and outer resampling. Assigns workers to the innermost
#' available parallelization level to avoid over-subscription.
#'
#' @param algorithm Character: Algorithm name.
#' @param hyperparameters `Hyperparameters` object: Setup using one of `setup_*` functions.
#' @param outer_resampling_config Optional ResamplerConfig object: Setup using [setup_Resampler].
#' @param n_workers Integer: Total number of workers you want to use.
#' @param verbosity Integer: Verbosity level.
#'
#' @details
#' The function prioritizes parallelization levels as follows:
#' 1. If algorithm is parallelized (e.g., LightGBM, Ranger): all workers go to algorithm
#' 2. Else if tuning is needed: all workers go to tuning (inner resampling)
#' 3. Else if outer resampling is set: all workers go to outer resampling
#' 4. Else: sequential execution (1 worker each)
#'
#' @return Named list with the number of workers for each level:
#' - `algorithm`: Number of workers for algorithm training.
#' - `tuning`: Number of workers for tuning (if applicable).
#' - `outer_resampling_config`: Number of workers for outer resampling (if applicable).
#'
#' @keywords internal
#' @noRd
get_n_workers <- function(
  algorithm,
  hyperparameters,
  outer_resampling_config,
  n_workers,
  verbosity = 1L
) {
  # Input validation
  stopifnot(
    is.character(algorithm),
    length(algorithm) == 1L,
    is.numeric(n_workers),
    n_workers >= 1L,
    n_workers == as.integer(n_workers)
  )

  # Check parallelization conditions
  is_parallelized <- algorithm %in% live[["parallelized_learners"]]
  requires_tuning <- needs_tuning(hyperparameters)
  requires_resampling <- !is.null(outer_resampling_config)

  # Assign workers to innermost parallelization level to avoid over-subscription
  if (is_parallelized) {
    # Parallelized algorithms get all workers, disable other parallelization
    workers_algorithm <- n_workers
    workers_tuning <- 1L
    workers_outer_resampling <- 1L
    if (verbosity > 1L && (requires_tuning || requires_resampling)) {
      msg(
        bold(algorithm),
        "is parallelized. Disabling tuning parallelization."
      )
    }
  } else if (requires_tuning) {
    # Tuning gets all workers if algorithm is not parallelized
    workers_algorithm <- 1L
    workers_tuning <- n_workers
    workers_outer_resampling <- 1L
    if (requires_resampling) {
      msg(
        "Tuning parallelization enabled.",
        verbosity = verbosity
      )
    }
  } else if (requires_resampling) {
    # Outer resampling gets all workers if no tuning needed
    workers_algorithm <- 1L
    workers_tuning <- 1L
    workers_outer_resampling <- n_workers
  } else {
    # Sequential execution
    workers_algorithm <- 1L
    workers_tuning <- 1L
    workers_outer_resampling <- 1L
  }

  msg0(
    bold("//"),
    " Max workers: ",
    highlight(n_workers),
    " => ",
    "Algorithm: ",
    highlight(workers_algorithm),
    "; Tuning: ",
    highlight(workers_tuning),
    "; Outer Resampling: ",
    highlight(workers_outer_resampling),
    verbosity = verbosity
  )

  list(
    algorithm = workers_algorithm,
    tuning = workers_tuning,
    outer_resampling = workers_outer_resampling
  )
} # /rtemis::get_n_workers


================================================
FILE: R/train_CART.R
================================================
# train_CART.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% train_.CARTHyperparameters ----
#' Train a CART decision tree
#'
#' Train a CART decision tree using `rpart`.
#'
#' CART does not need any special preprocessing.
#' It works with numeric and factor variables and handles missing values.
#' The "train_*" functions train a single model.
#' Use [train] for tuning and test using nested cross-validation.
#'
#' @param hyperparameters `CARTHyperparameters` object: make using [setup_CART].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation Optional tabular data: Not used for CART.
#' @param verbosity Integer: If > 0, print messages.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, CARTHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("rpart")

  # Arguments ----
  # Hyperparameters must be either untunable or frozen by `train`
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Data ----
  check_supervised(
    x = x,
    allow_missing = TRUE,
    verbosity = verbosity
  )
  if (is.null(weights)) {
    weights <- rep(1, NROW(x))
  }

  # Train ----
  # weights can't be NULL.
  # !If formula is character, the input to weights must be the unquoted column name in the data.frame
  # that contains weights, e.g. by doing cbind(x, weights = weights)
  model <- rpart::rpart(
    as.formula(make_formula(x)),
    data = x,
    weights = weights,
    control = rpart::rpart.control(
      minsplit = hyperparameters[["minsplit"]],
      minbucket = hyperparameters[["minbucket"]],
      cp = hyperparameters[["cp"]],
      maxcompete = hyperparameters[["maxcompete"]],
      maxsurrogate = hyperparameters[["maxsurrogate"]],
      usesurrogate = hyperparameters[["usesurrogate"]],
      surrogatestyle = hyperparameters[["surrogatestyle"]],
      maxdepth = hyperparameters[["maxdepth"]],
      xval = hyperparameters[["xval"]]
    )
  )

  # Cost-Complexity Pruning ----
  if (!is.null(hyperparameters[["prune_cp"]])) {
    model <- rpart::prune(model, cp = hyperparameters[["prune_cp"]])
  }
  check_inherits(model, "rpart")
  list(model = model, preprocessor = NULL)
} # /rtemis::train_.CARTHyperparameters


# %% predict_super.class_rpart ----
#' Predict from rpart model
#'
#' @param model rpart model.
#' @param newdata tabular data: Data to predict on.
#' @param type Character: Type of supervised learning ("Classification" or "Regression").
#'
#' @keywords internal
#' @noRd
method(predict_super, class_rpart) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  if (type == "Classification") {
    # Classification
    # predict.rpart returns a matrix n_cases x n_classes,
    # with classes are ordered the same as factor levels
    predicted_prob <- predict(model, newdata = newdata, type = "prob") # binclasspos = 2L
    if (NCOL(predicted_prob) == 2L) {
      # In binary classification, rpart returns matrix with 2 columns
      predicted_prob <- predicted_prob[, 2L]
    }
    predicted_prob
  } else {
    predict(model, newdata = newdata, type = "vector")
  }
} # /rtemis::predict_super.rpart


# %% varimp_super.class_rpart ----
#' Get variable importance from rpart model
#'
#' @param model rpart model.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_rpart) <- function(model) {
  vi <- model[["variable.importance"]]
  VariableImportance(
    data.table(
      variable = names(vi),
      importance = unname(vi)
    )
  )
} # /rtemis::varimp_super.rpart


================================================
FILE: R/train_GAM.R
================================================
# train_GAM.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% train_.GAMHyperparameters ----
#' Train a GAM model
#'
#' Train a GAM model using `GAM`.
#'
#' GAM does not work in the presence of missing values.
#'
#' @param hyperparameters `GAMHyperparameters` object: make using [setup_GAM].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation Optional tabular data: Not used for GAM.
#' @param verbosity Integer: If > 0, print messages.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, GAMHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("mgcv")

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Data ----
  check_supervised(
    x = x,
    allow_missing = FALSE,
    verbosity = verbosity
  )

  type <- supervised_type(x)
  n_classes <- if (type == "Classification") {
    nlevels(x[, ncol(x)])
  } else {
    NA
  }

  # Formula ----
  # use s(x, k = k) for all numeric predictors
  index_numeric <- which(sapply(features(x), is.numeric))
  spline_features <- if (length(index_numeric) > 0) {
    paste0(
      "s(",
      colnames(x)[index_numeric],
      ", k = ",
      hyperparameters[["k"]],
      ")",
      collapse = " + "
    )
  } else {
    ""
  }
  index_factor <- which(sapply(features(x), is.factor))
  categorical_features <- if (length(index_factor) > 0) {
    paste0(
      colnames(x)[index_factor],
      collapse = " + "
    )
  } else {
    ""
  }
  formula <- as.formula(
    gsub(
      " \\+ $",
      "",
      paste(
        outcome_name(x),
        "~",
        gsub(
          "^ \\+ ",
          "",
          paste(spline_features, categorical_features, sep = " + ")
        )
      )
    )
  )

  # Train ----
  family <- if (type == "Regression") {
    gaussian()
  } else if (type == "Classification") {
    if (n_classes == 2) {
      binomial()
    } else {
      mgcv::multinom()
    }
  }

  model <- mgcv::gam(
    formula = formula,
    family = family,
    data = x,
    weights = weights
  )
  check_inherits(model, "gam")
  list(model = model, preprocessor = NULL)
} # /rtemis::train_.GAMHyperparameters


# %% predict_super.class_gam ----
#' Predict from GAM model
#'
#' @param model GAM model.
#' @param newdata tabular data: Data to predict on.
#' @param type Character: Type of supervised learning ("Classification" or "Regression").
#'
#' @keywords internal
#' @noRd
method(predict_super, class_gam) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  out <- predict(object = model, newdata = newdata, type = "response")
  if (model[["family"]][["family"]] == "binomial") {
    # mgvc::predict.gam returns an array of 1 dimension that causes errors during type-checking.
    out <- as.numeric(out)
  }
  out
} # /rtemis::predict_super.gam


# %% varimp_super.class_gam ----
#' Get variable importance from GAM model
#'
#' Variable importance for GAM is estimated as the variance of each predictor's partial effect,
#' obtained via predict(model, type = "terms"). This measures each smooth term's contribution to
#' the variance of the fitted values. Values are normalized to sum to one, representing each
#' predictor's proportion of total predicted variance. This approach is computationally efficient
#' (no refitting required) and analogous to importance measures in tree-based methods. It assumes
#' approximate uncorrelatedness of partial effects, which penalized smooths tend to satisfy. For
#' models with high concurvity, consider hierarchical partitioning of R² (e.g. via the gam.hp
#' package) as an alternative.
#'
#' @param model mgcv gam model.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_gam) <- function(
  model,
  type = c("partial_effect", "F-test")
) {
  peff <- predict(model, type = "terms")
  vi <- apply(peff, 2, var)
  npeff <- vi / sum(vi) # normalized importance
  VariableImportance(
    data.table(
      variable = names(npeff),
      Partial_Effect_Variance = unname(npeff)
    )
  )
} # /rtemis::varimp_super.gam


# %% se_super.class_gam ----
#' Get Standard Errors from GAM model
#'
#' @param model mgcv gam model.
#' @param newdata tabular data: Data to predict on.
#'
#' @keywords internal
#' @noRd
method(se_super, class_gam) <- function(model, newdata) {
  predict(model, newdata = newdata, se.fit = TRUE)[["se.fit"]]
} # /rtemis::se_super.gam


================================================
FILE: R/train_GLM.R
================================================
# train_GLM.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% train_.GLMHyperparameters ----
#' Train a GLM model
#'
#' Train a GLM model using `stats::glm`.
#'
#' @details
#' `stats::glm` does not work in the presence of missing values.
#' This function uses the formula interface to `glm` to train a GLM model.
#' No preprocessing is needed.
#'
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param hyperparameters `GLMHyperparameters` object: make using [setup_GLM].
#' @param verbosity Integer: If > 0, print messages.
#'
#' @return GLM model.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, GLMHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Data ----
  check_supervised(
    x = x,
    allow_missing = FALSE,
    verbosity = verbosity
  )

  if (is.null(weights)) {
    weights <- rep(1, NROW(x))
  }

  type <- supervised_type(x)
  if (type == "Classification") {
    n_classes <- nlevels(outcome(x))
    if (n_classes > 2L) {
      cli::cli_abort("GLM does not support multiclass classification")
    }
  } else {
    n_classes <- NA_integer_
  }

  # Formula ----
  formula <- as.formula(
    paste(
      names(x)[ncol(x)],
      "~ ."
    )
  )

  # Train ----
  family <- if (type == "Regression") {
    gaussian()
  } else if (type == "Classification") {
    binomial()
  }
  model <- glm(
    formula = formula,
    family = family,
    data = x,
    weights = weights
  )
  check_inherits(model, "glm")
  list(model = model, preprocessor = NULL)
} # /rtemis::train_.GLMHyperparameters


# %% predict_super.class_glm ----
#' Predict from GLM model
#'
#' @param model GLM model.
#' @param newdata data.frame or similar: Data to predict on.
#'
#' @keywords internal
#' @noRd
method(predict_super, class_glm) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  predict(model, newdata = newdata, type = "response")
} # /rtemis::predict_super.glm


# %% varimp_super.class_glm ----
#' Get coefficients from GLM model
#'
#' @param model GLM model.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_glm) <- function(
  model,
  type = c("coefficients", "p-value")
) {
  type <- match.arg(type)
  .coef <- if (type == "coefficients") {
    coef(model)
  } else if (type == "p-value") {
    summary(model)[["coefficients"]][, 4]
  }
  VariableImportance(
    data.table(
      variable = names(.coef),
      Coefficient = unname(.coef)
    )
  )
} # /rtemis::varimp_super.glm


# %% se_super.class_glm ----
#' Get Standard Errors from GLM model
#'
#' @param model GLM model.
#' @param newdata data.frame or similar: Data to predict on.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(se_super, class_glm) <- function(model, newdata) {
  predict(model, newdata = newdata, se.fit = TRUE)[["se.fit"]]
} # /rtemis::se_super.glm


================================================
FILE: R/train_GLMNET.R
================================================
# train_GLMNET.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% train_.GLMNETHyperparameters ----
#' Train a GLMNET model
#'
#' Train a GLMNET model using `glmnet`.
#'
#' GLMNET does not work in the presence of missing values.
#'
#' @param hyperparameters `GLMNETHyperparameters` object: make using [setup_GLMNET].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation tabular data: Validation set (unused).
#' @param verbosity Integer: If > 0, print messages.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, GLMNETHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("glmnet")

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Convert "null" lambda to NULL
  if (hyperparameters[["lambda"]] == "null") {
    hyperparameters@hyperparameters[["lambda"]] <- NULL
  }

  # Data ----
  check_supervised(
    x = x,
    allow_missing = FALSE,
    verbosity = verbosity
  )

  # weights can't be NULL.
  if (is.null(weights)) {
    weights <- rep(1, NROW(x))
  }
  type <- supervised_type(x)
  n_classes <- if (type == "Classification") {
    nlevels(outcome(x))
  } else {
    NA_integer_
  }
  family <- if (is.null(hyperparameters[["family"]])) {
    if (type == "Regression") {
      "gaussian"
    } else if (type == "Classification") {
      if (n_classes == 2L) {
        "binomial"
      } else {
        "multinomial"
      }
    }
  }

  # Train ----
  # Create xm so that the correct NCOL is used for penalty_factor,
  # since factors are converted to dummy variables.
  xm <- as.matrix(
    model.matrix(~., exc(x, NCOL(x)))[, -1]
  )
  # Check data-specific hyperparameter values
  # penalty_factor must be of length = N features.
  if (is.null(hyperparameters[["penalty_factor"]])) {
    hyperparameters@hyperparameters[["penalty_factor"]] <- rep(1, NCOL(xm))
    if (verbosity > 1L) {
      msg_info("NCOL(xm): ", NCOL(xm))
      msg_info('Updated hyperparameters[["penalty_factor"]] to all 1s.')
    }
  } else {
    if (length(hyperparameters[["penalty_factor"]]) != NCOL(xm)) {
      cli::cli_abort(
        "Length of penalty_factor must be equal to the number of predictors."
      )
    }
  }
  # if lambda is NULL, use cv.glmnet to find optimal lambda
  if (is.null(hyperparameters[["lambda"]])) {
    model <- glmnet::cv.glmnet(
      x = xm,
      y = outcome(x),
      family = family,
      weights = weights,
      offset = hyperparameters[["offset"]],
      alpha = hyperparameters[["alpha"]],
      nlambda = hyperparameters[["nlambda"]],
      standardize = hyperparameters[["standardize"]],
      intercept = hyperparameters[["intercept"]], # can't be NULL
      penalty.factor = hyperparameters[["penalty_factor"]]
    )
    check_inherits(model, "cv.glmnet")
  } else {
    model <- glmnet::glmnet(
      x = xm,
      y = outcome(x),
      family = family,
      weights = weights,
      offset = hyperparameters[["offset"]],
      alpha = hyperparameters[["alpha"]],
      nlambda = hyperparameters[["nlambda"]],
      lambda = hyperparameters[["lambda"]],
      standardize = hyperparameters[["standardize"]],
      intercept = hyperparameters[["intercept"]], # can't be NULL
      penalty.factor = hyperparameters[["penalty_factor"]]
    )
    check_inherits(model, "glmnet")
  }
  list(model = model, preprocessor = NULL)
} # /rtemis::train_.GLMNETHyperparameters

#' Predict from GLMNET model
#'
#' @param model glmnet model.
#' @param newdata data.frame or similar: Data to predict on.
#' @param type Optional character: "Regression" or "Classification". Auto-detected if NULL.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(predict_super, class_glmnet) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  # Determine type
  # if model@classnames exists, type is Classification
  if (is.null(type)) {
    type <- if (!is.null(model[["classnames"]])) {
      "Classification"
    } else {
      "Regression"
    }
  }
  newdata <- as.matrix(
    model.matrix(~., newdata)[, -1, drop = FALSE]
  )
  if (type == "Regression") {
    predict(model, newx = newdata, type = "response")[, 1]
  } else if (type == "Classification") {
    predicted_prob <- predict(model, newx = newdata, type = "response")
    if (NCOL(predicted_prob) == 1) {
      # In binary classification, glmnet returns matrix with 1 column
      # with probabilities of second level.
      predicted_prob <- as.numeric(predicted_prob)
    }
    predicted_prob
  }
} # /rtemis::predict_super.class_glmnet

#' @keywords internal
#' @noRd
method(predict_super, class_cv.glmnet) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  # Determine type
  # if model@classnames exists, type is Classification
  if (is.null(type)) {
    type <- if (!is.null(model[["classnames"]])) {
      "Classification"
    } else {
      "Regression"
    }
  }
  newdata <- as.matrix(
    model.matrix(~., newdata)[, -1, drop = FALSE]
  )
  if (type == "Regression") {
    predict(model, newx = newdata, type = "response")[, 1]
  } else if (type == "Classification") {
    predicted_prob <- predict(model, newx = newdata, type = "response")
    if (NCOL(predicted_prob) == 1) {
      # In binary classification, glmnet returns matrix with 1 column
      # with probabilities of second level.
      predicted_prob <- as.numeric(predicted_prob)
    }
    predicted_prob
  }
} # /rtemis::predict_super.class_cv.glmnet


# %% varimp_super.class_glmnet ----
#' Get coefficients from GLMNET model
#'
#' @param model glmnet model.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_glmnet) <- function(model) {
  coefs <- coef(model)

  # In multiclass, coef(model) returns a list of coefficient matrices, one per class.
  # Not yet supported as VariableImportance.
  if (is.list(coefs)) {
    return(NULL)
  }

  if (NCOL(coefs) > 1) {
    msg("GLMNET with multiple sets of coefficients - returning first column.")
  }

  # Exclude intercept
  coefs <- coefs[, 1][-1]
  VariableImportance(
    data.table(
      variable = names(coefs),
      Coefficient = unname(coefs)
    )
  )
} # /rtemis::varimp_super.class_glmnet


# %% varimp_super.class_cv.glmnet ----
#' @keywords internal
#' @noRd
method(varimp_super, class_cv.glmnet) <- function(model) {
  coefs <- coef(model)

  # In multiclass, coef(model) returns a list of coefficient matrices, one per class.
  # Not yet supported as VariableImportance.
  if (is.list(coefs)) {
    return(NULL)
  }

  # Exclude intercept
  coefs <- coefs[, 1][-1]
  VariableImportance(
    data.table(
      variable = names(coefs),
      Coefficient = unname(coefs)
    )
  )
} # /rtemis::varimp_super.class_cv.glmnet


================================================
FILE: R/train_Isotonic.R
================================================
# train_Isotonic.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% train_.IsotonicHyperparameters ----
#' Train an Isotonic model
#'
#' @details
#' This is primarily used for calibration of classification models.
#' Binary classification will not work if x and y are not monotonic, i.e. higher values in `x` must
#' correspond to `1`, i.e. positive class in y.
#' outcome `1`.
#'
#' @param hyperparameters `IsotonicHyperparameters` object: make using [setup_Isotonic].
#' @param x tabular data: Training set. Only a single predictor is allowed.
#' @param weights Not used.
#' @param dat_validation Not used.
#' @param verbosity Integer: If > 0, print messages.
#'
#' @return Object of class `stepfun`.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, IsotonicHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Data ----
  check_supervised(
    x = x,
    allow_missing = FALSE,
    verbosity = verbosity
  )
  if (NCOL(x) > 2) {
    cli::cli_abort("Isotonic requires a single predictor.")
  }

  if (!is.null(weights)) {
    cli::cli_abort("Isotonic does not support weights.")
  }

  type <- supervised_type(x)
  if (type == "Classification") {
    n_classes <- nlevels(outcome(x))
    if (n_classes > 2L) {
      cli::cli_abort("Isotonic does not support multiclass classification")
    }
    # Assuming binclasspos = 2L
    y <- as.numeric(x[[2]]) - 1
  } else {
    y <- x[[2]]
    n_classes <- NA_integer_
  }

  # Model ----
  ir <- isoreg(cbind(x[[1]], y))
  model <- as.stepfun(ir)
  check_inherits(model, "stepfun")
  list(model = model, preprocessor = NULL)
} # /rtemis::train_.IsotonicHyperparameters


# %% predict_super.class_stepfun ----
#' Predict from Isotonic model
#'
#' @param model Isotonic model.
#' @param newdata data.frame or similar: Data to predict on.
#' @param type Not used.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(predict_super, class_stepfun) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  model(newdata[[1]])
} # /rtemis::predict_super.class_stepfun


# %% varimp_super.class_stepfun ----
#' Get coefficients from Isotonic model
#'
#' @param model Isotonic model.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_stepfun) <- function(model) {
  NULL
} # /rtemis::varimp_super.class_stepfun


================================================
FILE: R/train_LightCART.R
================================================
# train_LightCART.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% train_.LightCARTHyperparameters ----
#' Decision Tree using LightGBM
#'
#' @param hyperparameters `LightCARTHyperparameters` object: make using [setup_LightCART].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation data.frame or similar: Validation set (not used for LightCART).
#' @param verbosity Integer: If > 0, print messages.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, LightCARTHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("lightgbm")

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Data ----
  check_supervised(
    x = x,
    allow_missing = TRUE,
    verbosity = verbosity
  )
  type <- supervised_type(x)
  if (type == "Classification") {
    nclasses <- nlevels(outcome(x))
  } else {
    nclasses <- 1L
  }
  if (is.null(hyperparameters[["objective"]])) {
    hyperparameters@hyperparameters[["objective"]] <- if (
      type == "Regression"
    ) {
      "regression"
    } else {
      if (nclasses == 2L) {
        "binary"
      } else {
        "multiclass"
      }
    }
  }
  ## Preprocess & create lgb.Dataset ----
  lgb_data <- prepare_lgb_data(
    x = x,
    type = type,
    weights = weights,
    verbosity = verbosity
  )
  x <- lgb_data[["train_data"]]
  prp <- lgb_data[["preprocessor"]]

  # Train ----
  params <- hyperparameters@hyperparameters
  params[["ifw"]] <- NULL
  # num_class is required for multiclass classification only, must be 1 or unset for regression & binary classification
  if (nclasses > 2L) {
    params[["num_class"]] <- nclasses
  }
  # Set n threads
  params[["num_threads"]] <- 1L

  model <- lightgbm::lgb.train(
    params = params,
    data = x,
    nrounds = 1L,
    valids = list(training = x),
    early_stopping_rounds = NULL,
    verbose = verbosity - 2L
  )
  check_inherits(model, "lgb.Booster")
  list(model = model, preprocessor = prp)
} # /rtemis::train_.LightCARTHyperparameters


================================================
FILE: R/train_LightGBM.R
================================================
# train_LightGBM.R
# ::rtemis::
# 2025 EDG rtemis.org

# LightGBM parameters
# https://lightgbm.readthedocs.io/en/latest/Parameters.html

# %% train_.LightGBMHyperparameters ----
#' Gradient Boosting with LightGBM
#'
#' @param hyperparameters `LightGBMHyperparameters` object: make using [setup_LightGBM].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation Optional tabular data: Validation set for early stopping.
#' @param verbosity Integer: If > 0, print messages.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, LightGBMHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("lightgbm")

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Convert "null" nrounds to max_nrounds
  if (hyperparameters[["nrounds"]] == "null") {
    hyperparameters@hyperparameters[["nrounds"]] <- hyperparameters[[
      "max_nrounds"
    ]]
  }

  # Data ----
  check_supervised(
    x = x,
    dat_validation = dat_validation,
    allow_missing = TRUE,
    verbosity = verbosity
  )
  type <- supervised_type(x)

  ## Objective ----
  if (type == "Classification") {
    nclasses <- nlevels(outcome(x))
  } else {
    nclasses <- 1L
  }
  if (is.null(hyperparameters[["objective"]])) {
    hyperparameters@hyperparameters[["objective"]] <- if (
      type == "Regression"
    ) {
      "regression"
    } else {
      if (nclasses == 2L) {
        "binary"
      } else {
        "multiclass"
      }
    }
  }

  ## Preprocess & create lgb.Datasets ----
  lgb_data <- prepare_lgb_data(
    x = x,
    dat_validation = dat_validation,
    type = type,
    weights = weights,
    verbosity = verbosity
  )
  x <- lgb_data[["train_data"]]
  dat_validation <- lgb_data[["valid_data"]]
  prp <- lgb_data[["preprocessor"]]

  # Train ----
  params <- hyperparameters@hyperparameters
  params[["nrounds"]] <- params[["max_nrounds"]] <- params[[
    "early_stopping_rounds"
  ]] <- params[["force_nrounds"]] <- params[["ifw"]] <- NULL

  # num_class is required for multiclass classification only, must be 1 or unset for regression & binary classification
  if (nclasses > 2L) {
    params[["num_class"]] <- nclasses
  }
  # Set n threads
  params[["num_threads"]] <- prop(hyperparameters, "n_workers")

  model <- lightgbm::lgb.train(
    params = params,
    data = x,
    nrounds = hyperparameters[["nrounds"]],
    valids = if (!is.null(dat_validation)) {
      list(training = x, validation = dat_validation)
    } else {
      list(training = x)
    },
    early_stopping_rounds = hyperparameters[["early_stopping_rounds"]],
    verbose = verbosity - 1L
  )
  check_inherits(model, "lgb.Booster")
  list(model = model, preprocessor = prp)
} # /rtemis::train_.LightGBMHyperparameters


# %% predict_super.class_lgb.Booster ----
#' Predict from LightGBM model
#'
#' @param model lgb.Booster object.
#' @param newdata tabular data: Data to predict on. Will have been preprocessed by
#' `predict.Supervised` before calling this method if algorithm-specific preprocessing was performed during training.
#' @param type Character: Type of supervised learning.
#'
#' @keywords internal
#' @noRd
method(predict_super, class_lgb.Booster) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  check_inherits(model, "lgb.Booster")
  check_inherits(newdata, "data.frame")

  # Algorithm-specific preprocessing (factor2integer) is applied by
  # predict.Supervised before calling this method. See R/train.R and R/07_Supervised.R

  # Predict ----
  predict(model, newdata = as.matrix(newdata))
} # /rtemis::predict_super.lgb.Booster


# %% varimp_super.class_lgb.Booster ----
#' Get variable importance from LightGBM model
#'
#' @param model lgb.Booster object.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_lgb.Booster) <- function(model) {
  check_inherits(model, "lgb.Booster")
  vi <- lightgbm::lgb.importance(model, percentage = TRUE) # -> data.table
  names(vi)[1] <- "variable"
  VariableImportance(vi)
} # /rtemis::varimp_super.lgb.Booster


================================================
FILE: R/train_LightRF.R
================================================
# train_LightRF.R
# ::rtemis::
# 2025 EDG rtemis.org

# References
# LightGBM parameters: https://lightgbm.readthedocs.io/en/latest/Parameters.html

# %% train_.LightRFHyperparameters ----
#' Random Forest using LightGBM
#'
#' @param hyperparameters `LightRFHyperparameters` object: make using [setup_LightRF].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation Optional tabular data: Validation set for early stopping.
#' @param verbosity Integer: If > 0, print messages.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, LightRFHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("lightgbm")

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Data ----
  check_supervised(
    x = x,
    dat_validation = dat_validation,
    allow_missing = TRUE,
    verbosity = verbosity
  )
  type <- supervised_type(x)
  if (type == "Classification") {
    nclasses <- nlevels(outcome(x))
  } else {
    nclasses <- 1L
  }
  if (is.null(hyperparameters[["objective"]])) {
    hyperparameters@hyperparameters[["objective"]] <- if (
      type == "Regression"
    ) {
      "regression"
    } else {
      if (nclasses == 2L) {
        "binary"
      } else {
        "multiclass"
      }
    }
  }

  ## Preprocess & create lgb.Datasets ----
  lgb_data <- prepare_lgb_data(
    x = x,
    dat_validation = dat_validation,
    type = type,
    weights = weights,
    verbosity = verbosity
  )
  x <- lgb_data[["train_data"]]
  dat_validation <- lgb_data[["valid_data"]]
  prp <- lgb_data[["preprocessor"]]

  # Train ----
  params <- hyperparameters@hyperparameters
  # Remove params that are not used by LightGBM
  params[["ifw"]] <- NULL
  params[["nrounds"]] <- params[["early_stopping_rounds"]] <- NULL
  # num_class is required for multiclass classification only, must be 1 or unset for regression & binary classification
  if (nclasses > 2L) {
    params[["num_class"]] <- nclasses
  }
  # Set n threads
  params[["num_threads"]] <- prop(hyperparameters, "n_workers")

  model <- lightgbm::lgb.train(
    params = params,
    data = x,
    nrounds = hyperparameters[["nrounds"]],
    valids = if (!is.null(dat_validation)) {
      list(training = x, validation = dat_validation)
    } else {
      list(training = x)
    },
    early_stopping_rounds = hyperparameters[["early_stopping_rounds"]],
    verbose = verbosity - 2L
  )
  check_inherits(model, "lgb.Booster")
  list(model = model, preprocessor = prp)
} # /rtemis::train_.LightRFHyperparameters


================================================
FILE: R/train_LightRuleFit.R
================================================
# train_LightRuleFit.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% train_.LightRuleFitHyperparameters ----
#' Train a LightRuleFit model
#'
#' Train a LightRuleFit model using LightGBM and GLMNET.
#'
#' @param hyperparameters `LightRuleFitHyperparameters` object: make using [setup_LightRuleFit].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation tabular data: Validation set.
#' @param verbosity Integer: If > 0, print messages.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, LightRuleFitHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("lightgbm", "glmnet", "matrixStats", "gsubfn")

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Data ----
  check_supervised(
    x = x,
    dat_validation = dat_validation,
    allow_missing = TRUE,
    verbosity = verbosity
  )
  type <- supervised_type(x)
  nclasses <- if (type == "Classification") nlevels(x[[ncol(x)]]) else 1L

  # IFW for LightGBM ----
  # See setup_LightRuleFit: You can choose to use IFW for both steps with `ifw = TRUE` OR control each steps individually using `ifw_lightgbm` and `ifw_glmnet`.
  lightgbm_weights <- if (hyperparameters[["ifw_lightgbm"]]) {
    ifw(x[[ncol(x)]], verbosity = verbosity)
  } else {
    weights
  }

  # Train Gradient Boosting using LightGBM ----
  # LightRuleFit_tunable includes the names of all LightGBM hyperparameters used by LightRuleFit.
  lgbm_parameters <- update(
    setup_LightGBM(),
    get_hyperparams(hyperparameters, LightRuleFit_lightgbm_params)
  )
  lgbm_parameters@hyperparameters[["ifw"]] <- hyperparameters[["ifw_lightgbm"]]
  mod_lgbm <- train(
    x = x,
    dat_validation = dat_validation,
    weights = lightgbm_weights,
    hyperparameters = lgbm_parameters,
    # tuner_config = tuner_config, # ? add tuner_config to LightRuleFitHyperparameters
    outer_resampling_config = NULL,
    execution_config = execution_config,
    verbosity = verbosity
  )

  # Extract Rules from Boosted Trees ----
  lgbm_rules <- extract_rules(
    mod_lgbm@model,
    n_iter = NULL,
    xnames = names(x),
    factor_levels = get_factor_levels(x)
  )

  # Match cases x rules ----
  cases_by_rules <- match_cases_by_rules(x, lgbm_rules, verbosity = verbosity)

  # IFW for LASSO ----
  glmnet_weights <- if (hyperparameters[["ifw_glmnet"]]) {
    ifw(x[[ncol(x)]], verbosity = verbosity)
  } else {
    weights
  }

  # LASSO: Select Rules ----
  lasso_hyperparameters <- setup_GLMNET(
    alpha = hyperparameters[["alpha"]],
    lambda = hyperparameters[["lambda"]]
  )
  dat_rules <- data.frame(cases_by_rules, y = x[[ncol(x)]])
  colnames(dat_rules)[ncol(dat_rules)] <- colnames(x)[ncol(x)]
  mod_glmnet <- train(
    dat_rules,
    hyperparameters = lasso_hyperparameters,
    weights = glmnet_weights,
    execution_config = execution_config,
    verbosity = verbosity
  )

  # Rule coefficients ----
  rules_coefs <- data.matrix(coef(mod_glmnet@model))
  # Need special handling for multiclass support starting here
  intercept_coef <- rules_coefs[1, , drop = FALSE]
  colnames(intercept_coef) <- "Coefficient"
  rules_coefs <- data.frame(Rule = lgbm_rules, Coefficient = rules_coefs[-1, 1])
  nonzero_index <- which(abs(rules_coefs[["Coefficient"]]) > 0)
  rules_selected <- lgbm_rules[nonzero_index]
  cases_by_rules_selected <- cases_by_rules[, nonzero_index]
  Ncases_by_rules <- matrixStats::colSums2(cases_by_rules_selected)

  # Empirical risk ----
  if (type == "Classification" && nclasses == 2) {
    x <- as.data.table(x)
    empirical_risk <- vector("numeric", length(rules_selected))
    for (i in seq_along(rules_selected)) {
      match <- x[eval(parse(text = rules_selected[i])), ]
      freq <- table(match[[ncol(match)]])
      empirical_risk[i] <- freq[mod_glmnet@binclasspos] / sum(freq)
    }
  } else {
    empirical_risk <- NULL
  }

  # Format Rules ----
  # => Check format_LightRuleFit_rules' use of gsubfn::gsubfn
  rules_selected_formatted <- format_LightRuleFit_rules(
    rules_selected,
    decimal_places = 2
  )
  # appease R CMD check
  Coefficient <- NULL
  rules_selected_formatted_coefs <- data.table(
    Rule_ID = seq(rules_selected_formatted),
    Rule = rules_selected_formatted,
    N_Cases = Ncases_by_rules,
    Coefficient = rules_coefs[["Coefficient"]][nonzero_index]
  )
  if (type == "Classification" && nclasses == 2) {
    # appease R CMD check
    Empirical_Risk <- NULL
    rules_selected_formatted_coefs[, Empirical_Risk := empirical_risk]
  }
  setorder(rules_selected_formatted_coefs, -Coefficient)

  # LightRuleFit ----
  model <- LightRuleFit(
    model_lightgbm = mod_lgbm,
    model_glmnet = mod_glmnet,
    rules = lgbm_rules,
    rules_coefs = rules_coefs,
    rules_index = nonzero_index,
    rules_selected = rules_selected,
    rules_selected_formatted = rules_selected_formatted,
    rules_selected_formatted_coefs = rules_selected_formatted_coefs,
    y_levels = levels(x[[ncol(x)]]),
    xnames = names(x)[-ncol(x)],
    complexity_metrics = data.frame(
      n_rules_total = length(lgbm_rules),
      n_nonzero_rules = length(nonzero_index)
    )
  )
  list(model = model, preprocessor = NULL)
} # /rtemis::train_.LightRuleFitHyperparameters


# %% predict_super.LightRuleFitHyperparameters ----
#' Predict from LightRuleFit LightGBM model
#'
#' @param model LightRuleFit object trained using `train_LightRuleFit`.
#' @param newdata data.frame or similar: Data to predict on.
#'
#' @keywords internal
#' @noRd
method(predict_super, LightRuleFit) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  check_inherits(newdata, "data.frame")

  rules <- model@rules
  cases_by_rules <- match_cases_by_rules(newdata, rules, verbosity = verbosity)
  datm <- data.matrix(cases_by_rules)
  if (model@model_lightgbm@type == "Classification") {
    predicted <- predict(
      model@model_glmnet@model,
      newx = datm,
      type = "response"
    )
    if (length(model@y_levels) == 2) {
      predicted[, 1]
    } else {
      predicted
    }
  } else {
    as.numeric(predict(model@model_glmnet@model, newx = datm))
  }
} # /rtemis::predict_super.LightRuleFit


# %% varimp_super.LightRuleFit ----
#' Get variable importance from LightRuleFit model
#'
#' @param model LightRuleFit object trained using `train_LightRuleFit`.
#'
#' @keywords internal
#' @noRd
method(varimp_super, LightRuleFit) <- function(model) {
  .coef <- coef(model@model_glmnet@model)[-1, , drop = FALSE]
  VariableImportance(
    data.table(
      variable = rownames(.coef),
      Coefficient = unname(.coef[, 1])
    )
  )
} # /rtemis::varimp_super.LightRuleFit


================================================
FILE: R/train_Ranger.R
================================================
# train_Ranger.R
# ::rtemis::
# 2025 EDG rtemis.org

# References
# https://imbs-hl.github.io/ranger/reference/ranger.html

# %% train_.RangerHyperparameters ----
#' Random Forest using Ranger
#'
#' @param hyperparameters `RangerHyperparameters`: Hyperparameters for Ranger.
#' @param x tabular data: Training data.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation tabular data: Validation data (currently unused).
#' @param verbosity Integer: Verbosity level.
#'
#' @return `ranger` model object.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, RangerHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("ranger")

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }
  # mtry cannot be larger than number of features
  if (any(hyperparameters@hyperparameters[["mtry"]] > NCOL(features(x)))) {
    cli::cli_abort(
      "mtry cannot be greater than number of features: {ncol(features(x))}."
    )
  }

  # Data ----
  check_supervised(
    x = x,
    allow_missing = TRUE,
    verbosity = verbosity
  )
  type <- supervised_type(x)

  # Train ----
  model <- ranger::ranger(
    formula = NULL,
    x = features(x),
    y = outcome(x),
    num.trees = hyperparameters@hyperparameters[["num_trees"]],
    mtry = hyperparameters@hyperparameters[["mtry"]],
    importance = hyperparameters@hyperparameters[["importance"]],
    write.forest = hyperparameters@hyperparameters[["write_forest"]],
    probability = type == "Classification",
    min.node.size = hyperparameters@hyperparameters[["min_node_size"]],
    min.bucket = hyperparameters@hyperparameters[["min_bucket"]],
    max.depth = hyperparameters@hyperparameters[["max_depth"]],
    replace = hyperparameters@hyperparameters[["replace"]],
    sample.fraction = hyperparameters@hyperparameters[["sample_fraction"]],
    case.weights = weights,
    splitrule = hyperparameters@hyperparameters[["splitrule"]],
    num.random.splits = hyperparameters@hyperparameters[["num_random_splits"]],
    alpha = hyperparameters@hyperparameters[["alpha"]],
    minprop = hyperparameters@hyperparameters[["minprop"]],
    poisson.tau = hyperparameters@hyperparameters[["poisson_tau"]],
    split.select.weights = hyperparameters@hyperparameters[[
      "split_select_weights"
    ]],
    always.split.variables = hyperparameters@hyperparameters[[
      "always_split_variables"
    ]],
    respect.unordered.factors = hyperparameters@hyperparameters[[
      "respect_unordered_factors"
    ]],
    scale.permutation.importance = hyperparameters@hyperparameters[[
      "scale_permutation_importance"
    ]],
    local.importance = hyperparameters@hyperparameters[["local_importance"]],
    regularization.factor = hyperparameters@hyperparameters[[
      "regularization_factor"
    ]],
    regularization.usedepth = hyperparameters@hyperparameters[[
      "regularization_usedepth"
    ]],
    keep.inbag = hyperparameters@hyperparameters[["keep_inbag"]],
    inbag = hyperparameters@hyperparameters[["inbag"]],
    holdout = hyperparameters@hyperparameters[["holdout"]],
    quantreg = hyperparameters@hyperparameters[["quantreg"]],
    time.interest = hyperparameters@hyperparameters[["time_interest"]],
    oob.error = hyperparameters@hyperparameters[["oob_error"]],
    num.threads = prop(hyperparameters, "n_workers"),
    save.memory = hyperparameters@hyperparameters[["save_memory"]],
    verbose = verbosity > 0L,
    node.stats = hyperparameters@hyperparameters[["node_stats"]],
    seed = hyperparameters@hyperparameters[["seed"]],
    na.action = hyperparameters@hyperparameters[["na_action"]]
  )
  check_inherits(model, "ranger")
  list(model = model, preprocessor = NULL)
} # /rtemis::train_.RangerHyperparameters

#' Predict from Ranger model
#'
#' @param model `ranger` model object.
#' @param newdata data.frame or similar: Data to predict on.
#' @param type Character: Prediction type.
#' @param verbosity Integer: Verbosity level.
#' @param ranger_type Character: Ranger prediction type.
#' @param ... Additional arguments passed to ranger predict.
#'
#' @keywords internal
#' @noRd
method(predict_super, class_ranger) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  check_inherits(model, "ranger")
  check_inherits(newdata, "data.frame")

  # Predict ----
  predicted <- predict(
    model,
    data = newdata,
    type = "response",
    verbose = verbosity > 0L
  )[["predictions"]]
  if (type == "Classification" && NCOL(predicted) == 2L) {
    # In binary classification, ranger returns matrix with 2 columns
    # with probabilities for each class
    predicted <- predicted[, 2L]
  }
  predicted
} # /rtemis::predict_super.class_ranger


# %% varimp_super.class_ranger ----
#' Get variable importance from Ranger model
#'
#' @param model `ranger` model object.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_ranger) <- function(model) {
  check_inherits(model, "ranger")
  vi <- ranger::importance(model)
  VariableImportance(
    data.table(
      variable = names(vi),
      importance = unname(vi)
    )
  )
} # /rtemis::varimp_super.class_ranger


# %% validate_hyperparameters.RangerHyperparameters ----
#' Validate Ranger Hyperparameters
#'
#' Validate Ranger Hyperparameters given training data.
#'
#' @param x tabular data: Training data.
#' @param hyperparameters `RangerHyperparameters`: Hyperparameters to check.
#'
#' @return NULL. Will throw error if hyperparameters are invalid.
#'
#' @keywords internal
#' @noRd
method(validate_hyperparameters, RangerHyperparameters) <- function(
  x,
  hyperparameters
) {
  check_is_S7(x, class_data.frame)
  check_is_S7(hyperparameters, RangerHyperparameters)

  # Check mtry
  if (any(hyperparameters@hyperparameters[["mtry"]] > NCOL(features(x)))) {
    cli::cli_abort(
      "mtry cannot be greater than number of features: {ncol(features(x))}."
    )
  }

  hyperparameters
} # /rtemis::validate_hyperparameters.RangerHyperparameters


================================================
FILE: R/train_SVM.R
================================================
# train_SVM.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% train_.LinearSVMHyperparameters ----
#' Train a Linear SVM model
#'
#' Train a Linear SVM model using `e1071::svm`.
#'
#' SVM does not work in the presence of missing values.
#'
#' @param hyperparameters `LinearSVMHyperparameters` object: make using [setup_LinearSVM].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation Optional tabular data: Not used for Linear SVM.
#' @param verbosity Integer: If > 0, print messages.
#'
#' @return Object of class `svm`.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, LinearSVMHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("e1071")

  # Checks ----
  if (!is.null(weights)) {
    cli::cli_abort(
      "Case weights are not supported by e1071::svm. You can enable `ifw` in the hyperparameters to use inverse frequency weighting instead."
    )
  }

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Data ----
  check_supervised(
    x = x,
    allow_missing = FALSE,
    verbosity = verbosity
  )

  type <- supervised_type(x)
  n_classes <- if (type == "Classification") {
    nlevels(outcome(x))
  } else {
    NA
  }

  # Preprocess ----
  # One-hot encode
  y <- outcome(x)
  x <- features(x)
  factor_index <- names(x)[which(sapply(x, is.factor))]
  if (length(factor_index) > 0L) {
    prp <- preprocess(
      x,
      config = setup_Preprocessor(one_hot = TRUE),
      verbosity = verbosity
    )
    x <- preprocessed(prp)
  } else {
    prp <- NULL
  }

  # Can use class_weights or set class.weights = "inverse" in svm()
  # if (is.null(weights)) {
  #   weights <- rep(1, NROW(x))
  # }

  # Train ----
  class_weights <-
    if (
      type == "Classification" && n_classes == 2 && hyperparameters[["ifw"]]
    ) {
      "inverse"
    } else {
      NULL
    }
  # gamma can't be NULL even if not used
  gamma <- hyperparameters[["gamma"]]
  if (is.null(gamma)) {
    gamma <- 1
  }
  model <- e1071::svm(
    x = x,
    y = y, # factor or numeric
    kernel = hyperparameters[["kernel"]],
    cost = hyperparameters[["cost"]],
    gamma = gamma,
    class.weights = class_weights,
    probability = TRUE
  )
  check_inherits(model, "svm")
  list(model = model, preprocessor = prp)
} # /rtemis::train_.LinearSVMHyperparameters


# %% train_.RadialSVMHyperparameters ----
#' Train a Radial SVM model
#'
#' Train a Radial SVM model using `e1071::svm`.
#'
#' SVM does not work in the presence of missing values.
#'
#' @param hyperparameters `RadialSVMHyperparameters` object: make using [setup_RadialSVM].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation Optional tabular data: Not used for Radial SVM.
#' @param verbosity Integer: If > 0, print messages.
#'
#' @return Object of class `svm`.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, RadialSVMHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("e1071")

  # Checks ----
  if (!is.null(weights)) {
    cli::cli_abort(
      "Case weights are not supported by e1071::svm. You can enable `ifw` in the hyperparameters to use inverse frequency weighting instead."
    )
  }

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Data ----
  check_supervised(
    x = x,
    allow_missing = FALSE,
    verbosity = verbosity
  )

  type <- supervised_type(x)
  n_classes <- if (type == "Classification") {
    nlevels(outcome(x))
  } else {
    NA
  }

  # Preprocess ----
  # One-hot encode
  y <- outcome(x)
  x <- features(x)
  factor_index <- names(x)[which(sapply(x, is.factor))]
  if (length(factor_index) > 0L) {
    prp <- preprocess(
      x,
      config = setup_Preprocessor(one_hot = TRUE),
      verbosity = verbosity
    )
    x <- preprocessed(prp)
  } else {
    prp <- NULL
  }

  # Can use class_weights or set class.weights = "inverse" in svm()
  # if (is.null(weights)) {
  #   weights <- rep(1, NROW(x))
  # }

  # Train ----
  class_weights <-
    if (
      type == "Classification" && n_classes == 2 && hyperparameters[["ifw"]]
    ) {
      "inverse"
    } else {
      NULL
    }
  # gamma can't be NULL even if not used
  gamma <- hyperparameters[["gamma"]]
  if (is.null(gamma)) {
    gamma <- 1
  }
  model <- e1071::svm(
    x = x,
    y = y, # factor or numeric
    kernel = hyperparameters[["kernel"]],
    cost = hyperparameters[["cost"]],
    gamma = gamma,
    class.weights = class_weights,
    probability = TRUE
  )
  check_inherits(model, "svm")
  list(model = model, preprocessor = prp)
} # /rtemis::train_.RadialSVMHyperparameters


# %% predict_super.svm ----
#' Predict from SVM model
#'
#' @param model SVM model.
#' @param newdata data.frame or similar: Data to predict on.
#' @param type Character: Type of supervised learning ("Classification" or "Regression").
#'
#' @keywords internal
#' @noRd
method(predict_super, class_svm) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  if (type == "Classification") {
    predicted_prob <- attr(
      predict(model, newdata = newdata, probability = TRUE),
      "probabilities"
    )
    if (length(model$levels) == 2) {
      predicted_prob[, 2]
    } else {
      predicted_prob
    }
  } else {
    predict(model, newdata = newdata)
  }
} # /rtemis::predict_super.svm


# %% varimp_super.class_svm ----
#' Get coefficients from SVM model
#'
#' @param model SVM model.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_svm) <- function(model) {
  # Only for linear kernel with binary classification
  if (model[["kernel"]] == 0L && model[["nclasses"]] == 2) {
    .coefs <- coef(model)
    VariableImportance(
      data.table(
        variable = names(.coefs),
        Coefficient = unname(.coefs)
      )
    )
  } else {
    NULL
  }
} # /rtemis::varimp_super.svm


================================================
FILE: R/train_TabNet.R
================================================
# train_TabNet.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% train_.TabNetHyperparameters ----
#' Train a TabNet model
#'
#' Train a TabNet model using `TabNet`.
#'
#' TabNet does not work in the presence of missing values.
#'
#' @param hyperparameters `TabNetHyperparameters` object: make using [setup_TabNet].
#' @param x tabular data: Training set.
#' @param weights Numeric vector: Case weights.
#' @param dat_validation tabular data: Validation set for early stopping.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Object of class `TabNet`.
#'
#' @author EDG
#' @keywords internal
#' @noRd
method(train_, TabNetHyperparameters) <- function(
  hyperparameters,
  x,
  weights = NULL,
  dat_validation = NULL,
  execution_config = setup_ExecutionConfig(),
  verbosity = 1L
) {
  # Dependencies ----
  check_dependencies("torch", "tabnet")

  # Hyperparameters ----
  # Hyperparameters must be either untunable or frozen by `train`.
  if (needs_tuning(hyperparameters)) {
    cli::cli_abort("Hyperparameters must be fixed - use train() instead.")
  }

  # Data ----
  check_supervised(
    x = x,
    allow_missing = FALSE,
    verbosity = verbosity
  )
  type <- supervised_type(x)

  # Scale data ----
  y <- outcome(x)
  prp <- preprocess(
    features(x),
    config = setup_Preprocessor(scale = TRUE, center = TRUE)
  )
  x <- prp@preprocessed

  # Train ----
  # The predictor data should be standardized (e.g. centered or scaled). The model treats
  # categorical predictors internally thus, you don't need to make any treatment.
  config <- get_tabnet_config(hyperparameters)
  config[["verbose"]] <- verbosity > 0L
  model <- tabnet::tabnet_fit(
    x = x,
    y = y,
    config = config,
    weights = weights
  )
  check_inherits(model, "tabnet_fit")
  list(model = model, preprocessor = prp)
} # /rtemis::train_.TabNetHyperparameters


# %% predict_super.class_tabnet_fit ----
#' Predict from TabNet model
#'
#' @param model TabNet model.
#' @param newdata data.frame or similar: Data to predict on.
#' @param type Character: "Regression" or "Classification".
#'
#' @keywords internal
#' @noRd
method(predict_super, class_tabnet_fit) <- function(
  model,
  newdata,
  type = NULL,
  verbosity = 0L
) {
  if (type == "Regression") {
    predict(model, new_data = newdata)[[1]]
  } else if (type == "Classification") {
    predicted <- predict(model, new_data = newdata, type = "prob")
    if (NCOL(predicted) == 2) {
      predicted[[2]]
    } else {
      predicted
    }
  }
} # /rtemis::predict_super.class_tabnet_fit


# %% varimp_super.class_tabnet_fit ----
#' Get variable importance from TabNet model
#'
#' @param model TabNet model.
#'
#' @keywords internal
#' @noRd
method(varimp_super, class_tabnet_fit) <- function(model) {
  NULL
} # /rtemis::varimp_super.class_tabnet_fit


================================================
FILE: R/tune.R
================================================
# tune.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% get_tuner_fn ----
#' Get Tuner Function
#'
#' @param type Character: Type of tuner.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
get_tuner_fn <- function(type = "GridSearch") {
  type <- match_arg(type, c("GridSearch"))
  switch(type, "GridSearch" = "tune_GridSearch")
} # /rtemis::get_tuner_fn


# %% tune ----
#' Tune Supervised Learning Model
#'
#' @param x tabular data: Training set data.
#' @param hyperparameters `Hyperparameters` object: make using each learner's `setup_*` function.
#' @param tuner_config `TunerConfig` object: created with [setup_GridSearch].
#' @param preprocessor_config Optional `PreprocessorConfig` object: created with
#' [setup_Preprocessor].
#' @param weights Numeric vector: Optional case weights.
#' @param verbosity Integer: Verbosity level.
#'
#' @author EDG
#' @keywords internal
#' @noRd
tune <- function(
  x,
  hyperparameters,
  tuner_config,
  preprocessor_config = NULL,
  weights = NULL,
  verbosity = 1L,
  backend = "none",
  future_plan = "multicore",
  n_workers = 1L
) {
  check_is_S7(hyperparameters, Hyperparameters)
  check_is_S7(tuner_config, TunerConfig)
  stopifnot(needs_tuning(hyperparameters))

  if (tuner_config@type == "GridSearch") {
    tune_GridSearch(
      x = x,
      hyperparameters = hyperparameters,
      tuner_config = tuner_config,
      preprocessor_config = preprocessor_config,
      weights = weights,
      verbosity = verbosity,
      backend = backend,
      future_plan = future_plan,
      n_workers = n_workers
    )
  } else {
    cli::cli_abort("Unsupported tuner type: {tuner_config@type}")
  }
} # /rtemis::tune


================================================
FILE: R/tune_GridSearch.R
================================================
# tune_GridSearch.R
# ::rtemis::
# 2025 EDG rtemis.org

# %% tune_GridSearch ----
#' \pkg{rtemis} internal: Grid Search for Hyperparameter Tuning of \pkg{rtemis} Learners
#'
#' Train models using a combination of parameter values for model selection
#'
#' @details
#' Note that weights, if defined (and not NULL), should be passed directly to `grid_search`
#' as they need to be resampled along `x` and `y`, and should not be passed along with
#' `grid_params`. `ifw` and `ifw_type` should be passed as part of `grid_params`
#' and will be passed on to the learner.
#' Includes a algorithm-specific extraction of config that are determined internally,
#' such as `lambda` for `GLMNET`, `nrounds` for `LightGBM`, etc.
#'
#' The current implementation allows running sequentially either directly using lapply + cli
#' progress, or using a sequential future plan. The former may give better debugging information.
#' The latter may be helpful to test that the future parallelization setup works correctly.
#'
#' @param x tabular data: Training set.
#' @param hyperparameters `Hyperparameters` object created with a learner's `setup_*` function.
#' @param tuner_config `TunerConfig` object created with [setup_GridSearch].
#' @param preprocessor_config Optional `PreprocessorConfig` object: Applied within each tuning
#' fold so hyperparameters are evaluated on preprocessed data.
#' @param weights Vector: Class weights.
#' @param save_mods Logical: Save models in tuning results.
#' @param n_workers Integer: Number of workers to use for parallel processing.
#' @param backend Character: Type of parallelization to use. Options are "none", "future",
#' or "mirai".
#' @param future_plan Character: Future plan to use if `backend` is "future".
#' @param verbosity Integer: Verbosity level.
#'
#' @return `GridSearch` object.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
tune_GridSearch <- function(
  x,
  hyperparameters,
  tuner_config,
  preprocessor_config = NULL,
  weights = NULL,
  save_mods = FALSE,
  n_workers = 1L,
  backend = NULL,
  future_plan = NULL,
  verbosity = 1L
) {
  check_is_S7(hyperparameters, Hyperparameters)
  check_is_S7(tuner_config, TunerConfig)
  stopifnot(needs_tuning(hyperparameters))

  # Dependencies ----
  if (backend == "future") {
    check_dependencies("futurize", "future.apply")
    if (!is.null(future_plan) && future_plan == "sequential") {
      if (n_workers > 1L) {
        cli::cli_abort(
          "Requested 'sequential' future plan, which supports {.val 1L} worker, but {.val {n_workers}} workers were requested."
        )
      }
    }
  } else if (backend == "mirai") {
    check_dependencies("mirai")
  }

  # Intro ----
  start_time <- intro(
    newline_pre = TRUE,
    caller = "tune_GridSearch",
    verbosity = verbosity - 1L
  )

  # Arguments ----
  algorithm <- hyperparameters@algorithm

  # Parallel Processing Strategy ----
  # If backend is NULL, default to "none"
  if (is.null(backend)) {
    backend <- "none"
  }

  # If backend is "future" or "mirai" with n_workers = 1, we execute
  # sequentially using the respective backend just to test that the
  # parallelization setup works.
  # If the user wants standard sequential execution, they should use/leave
  # backend = "none" (default).
  if (backend != "none" && n_workers == 1L) {
    if (verbosity > 0L) {
      msg0(
        "Using ",
        backend,
        " with 1 worker"
      )
    }
  }

  # Make Grid ----
  grid_params <- get_hyperparams_need_tuning(hyperparameters)
  n_resamples <- tuner_config[["resampler_config"]][["n"]]
  search_type <- tuner_config[["search_type"]]
  # expand_grid converts NULL to "null" for expansion to work.
  param_grid <- expand_grid(grid_params, stringsAsFactors = FALSE)
  param_grid <- cbind(param_combo_id = seq_len(NROW(param_grid)), param_grid)
  n_param_combinations <- NROW(param_grid)
  res_param_grid <- expand_grid(
    c(list(resample_id = seq_len(n_resamples)), grid_params),
    stringsAsFactors = FALSE
  )
  n_res_x_comb <- NROW(res_param_grid)
  if (search_type == "randomized") {
    index_per_resample <- sample(
      n_param_combinations,
      round(tuner_config[["randomize_p"]] * n_param_combinations)
    )
    res_param_grid <- res_param_grid[rep(index_per_resample, n_resamples), ]
  }

  # Intro pt. 2 ----
  if (verbosity > 0L) {
    msg0(
      fmt("<> ", col = col_tuner, bold = TRUE),
      "Tuning ",
      algorithm,
      " by ",
      search_type,
      " grid search with ",
      desc(tuner_config@config[["resampler_config"]]),
      "..."
    )
    msg0(
      fmt(n_param_combinations, col = col_tuner, bold = TRUE),
      ngettext(
        n_param_combinations,
        " parameter combination x ",
        " parameter combinations x "
      ),
      fmt(n_resamples, col = col_tuner, bold = TRUE),
      " resamples: ",
      fmt(n_res_x_comb, col = col_tuner, bold = TRUE),
      " models total",
      " (",
      Sys.getenv("R_PLATFORM"),
      ")."
    )
  }

  # Resamples ----
  res <- resample(
    x = x,
    config = tuner_config[["resampler_config"]],
    verbosity = verbosity
  )

  # learner1 ----
  if (backend == "future") {
    ptn <- progressr::progressor(steps = NROW(res_param_grid))
  }
  learner1 <- function(
    index,
    x,
    res,
    res_param_grid,
    hyperparameters,
    preprocessor_config,
    weights,
    verbosity,
    save_mods,
    n_res_x_comb
  ) {
    if (verbosity > 1L) {
      msg_info(
        "Running grid line #",
        fmt(index, col = col_tuner, bold = TRUE),
        "/",
        NROW(res_param_grid),
        "...",
        caller = "tune_GridSearch",
        sep = ""
      )
    }
    res1 <- res[[res_param_grid[index, "resample_id"]]]
    dat_train1 <- x[res1, ]
    weights1 <- weights[res1]
    dat_valid1 <- x[-res1, ]
    hyperparams1 <- hyperparameters
    hyperparams1 <- update(
      hyperparams1,
      as.list(res_param_grid[index, 2:NCOL(res_param_grid), drop = FALSE]),
      tuned = -9L # Hyperparameters are being tuned
    )

    mod1 <- do_call(
      "train",
      args = list(
        x = dat_train1,
        dat_validation = dat_valid1,
        algorithm = hyperparams1@algorithm,
        preprocessor_config = preprocessor_config,
        hyperparameters = hyperparams1,
        weights = weights1,
        verbosity = verbosity - 1L
      )
    )

    out1 <- list(
      id = index,
      resample_id = res_param_grid[index, "resample_id"],
      metrics_training = mod1@metrics_training,
      metrics_validation = mod1@metrics_validation,
      type = mod1@type,
      hyperparameters = hyperparams1
    )

    # Algorithm-specific params ----
    # => add to hyperparameters
    if (algorithm == "GLMNET") {
      out1[["hyperparameters"]]@hyperparameters[["lambda.min"]] <- mod1@model[[
        "lambda.min"
      ]]
      out1[["hyperparameters"]]@hyperparameters[["lambda.1se"]] <- mod1@model[[
        "lambda.1se"
      ]]
    }
    if (algorithm == "LightGBM") {
      # Check best_iter is meaningful, otherwise issue message and set to 100L
      best_iter <- mod1@model[["best_iter"]]
      if (is.null(best_iter) || best_iter == -1 || best_iter == 0) {
        msg_info(
          paste(
            "best_iter returned from lightgbm:",
            best_iter,
            "- setting to 100L"
          )
        )
        best_iter <- 100L
      }
      out1[["hyperparameters"]]@hyperparameters[["best_iter"]] <- best_iter
    }
    # if (algorithm %in% c("LINAD", "LINOA")) {
    #   out1$est.n.leaves <- mod1$mod$n.leaves
    # }
    # if (algorithm == "LIHADBoost") {
    #   out1$sel.n.steps <- mod1$mod$selected.n.steps
    # }
    if (save_mods) {
      out1[["mod1"]] <- mod1
    }
    if (backend == "future") {
      ptn(sprintf("Tuning resample %i/%i", index, n_res_x_comb))
    }
    out1
  } # /learner1

  # Train Grid ----
  if (backend == "none") {
    if (verbosity > 0L) {
      msg("Tuning in sequence")
    }
    # Sequential execution with cli progress.
    grid_run <- lapply(
      cli::cli_progress_along(
        seq_len(n_res_x_comb),
        name = paste0("Tuning... (", n_res_x_comb, " combinations)"),
        type = "tasks"
      ),
      FUN = learner1,
      x = x,
      res = res,
      hyperparameters = hyperparameters,
      res_param_grid = res_param_grid,
      preprocessor_config = preprocessor_config,
      weights = weights,
      verbosity = verbosity,
      save_mods = save_mods,
      n_res_x_comb = n_res_x_comb
    )
  } else if (backend == "future") {
    # Future parallelization
    future_plan <- set_preferred_plan(
      requested_plan = future_plan,
      n_workers = n_workers,
      envir = parent.frame(),
      verbosity = verbosity
    )
    if (verbosity > 0L) {
      msg0(
        "Tuning using future (",
        bold(future_plan),
        "); N workers: ",
        bold(n_workers)
      )
    }
    if (verbosity > 1L) {
      # verify plan set by set_preferred_plan with envir
      msg_info("Current future plan:")
      print(future::plan())
    }
    grid_run <- lapply(
      X = seq_len(n_res_x_comb),
      FUN = learner1,
      x = x,
      res = res,
      hyperparameters = hyperparameters,
      res_param_grid = res_param_grid,
      preprocessor_config = preprocessor_config,
      weights = weights,
      verbosity = verbosity,
      save_mods = save_mods,
      n_res_x_comb = n_res_x_comb
    ) |>
      futurize::futurize(seed = TRUE, globals = FALSE)
  } else if (backend == "mirai") {
    if (verbosity > 0L) {
      msg("Tuning using mirai; N workers:", bold(n_workers))
    }
    mirai::daemons(n_workers, dispatcher = TRUE)
    on.exit(mirai::daemons(0L))
    grid_run <- mirai::mirai_map(
      .x = seq_len(n_res_x_comb),
      .f = learner1,
      .args = list(
        x = x,
        res = res,
        hyperparameters = hyperparameters,
        res_param_grid = res_param_grid,
        preprocessor_config = preprocessor_config,
        weights = weights,
        verbosity = verbosity,
        save_mods = save_mods,
        n_res_x_comb = n_res_x_comb
      )
    )
  }

  # Metric ----
  type <- supervised_type(x)
  metric <- tuner_config@config[["metric"]]
  maximize <- tuner_config@config[["maximize"]]
  if (is.null(metric)) {
    if (type == "Classification") {
      metric <- "Balanced_Accuracy"
    } else if (type == "Regression") {
      metric <- "MSE"
    } else {
      metric <- "Concordance"
    }
    tuner_config@config[["metric"]] <- metric
  }
  if (is.null(maximize)) {
    maximize <- metric %in%
      c("Accuracy", "Balanced_Accuracy", "Concordance", "Rsq", "r")
    tuner_config@config[["maximize"]] <- maximize
  }
  select_fn <- if (maximize) which.max else which.min
  verb <- if (maximize) "maximize" else "minimize"

  # Aggregate ----
  # Average test errors
  # if using mirai, wait for all to finish
  if (backend == "mirai") {
    # Appease R CMD check
    .progress <- NULL
    grid_run <- grid_run[.progress]
    # grid_run <- mirai::collect_mirai(grid_run)
  }
  if (type %in% c("Regression", "Survival")) {
    metrics_training_all <- as.data.table(t(sapply(
      grid_run,
      function(r) unlist(r[["metrics_training"]]@metrics)
    )))
    metrics_validation_all <- as.data.table(t(sapply(
      grid_run,
      function(r) unlist(r[["metrics_validation"]]@metrics)
    )))
  } else if (type == "Classification") {
    metrics_training_all <- as.data.table(t(sapply(
      grid_run,
      function(r) unlist(r[["metrics_training"]]@metrics[["Overall"]])
    )))
    metrics_validation_all <- as.data.table(t(sapply(
      grid_run,
      function(r) unlist(r[["metrics_validation"]]@metrics[["Overall"]])
    )))
  }
  # appease R CMD check
  param_combo_id <- NULL
  metrics_validation_all[,
    param_combo_id := rep(
      seq_len(n_param_combinations),
      each = n_resamples
    )
  ]
  metrics_training_all[,
    param_combo_id := rep(
      seq_len(n_param_combinations),
      each = n_resamples
    )
  ]
  metrics_training_by_combo_id <- metrics_training_all[,
    lapply(
      .SD,
      get(tuner_config[["metrics_aggregate_fn"]])
    ),
    by = param_combo_id
  ]
  metrics_validation_by_combo_id <- metrics_validation_all[,
    lapply(
      .SD,
      get(tuner_config[["metrics_aggregate_fn"]])
    ),
    by = param_combo_id
  ]

  tune_results <- list(
    param_grid = param_grid,
    metrics_training = metrics_training_by_combo_id,
    metrics_validation = metrics_validation_by_combo_id
  )

  # Algorithm-specific collection ----
  # N of iterations is the one hyperparameter that may be determined
  # automatically, we therefore need to extract it and average it
  ## GLMNET ----
  if (algorithm == "GLMNET") {
    if (is.null(grid_params[["lambda"]])) {
      # if lambda was NULL, cv.glmnet was run and optimal lambda was estimated
      # For each i in grid_run, get grid_run[[i]]$hyperparameters[[grid_run[[i]]$hyperparameters$which_lambda_cv]]
      if (verbosity > 1L) {
        msg_info("Extracting best lambda from GLMNET models...")
      }
      lambda_cv2 <- data.table(
        lambda = sapply(
          grid_run,
          function(x) {
            x[["hyperparameters"]][[x[["hyperparameters"]][[
              "which_lambda_cv"
            ]]]]
          }
        )
      )
      lambda_cv2[,
        param_combo_id := rep(
          seq_len(n_param_combinations),
          each = n_resamples
        )
      ]
      lambda_by_param_combo_id <- lambda_cv2[,
        lapply(.SD, get(tuner_config[["metrics_aggregate_fn"]])),
        by = param_combo_id
      ]
      # Replace NULL lambda in tune_results$param_grid with average value of CV-squared lambda
      stopifnot(tune_results[["param_grid"]][["lambda"]] == "null")
      param_grid[["lambda"]] <- tune_results[["param_grid"]][[
        "lambda"
      ]] <- lambda_by_param_combo_id[["lambda"]]
    }
  } # /GLMNET

  ## LightGBM ----
  if (algorithm == "LightGBM") {
    if (is.null(grid_params[["nrounds"]])) {
      if (verbosity > 1L) {
        msg_info("Extracting best N of iterations from LightGBM models...")
      }
      nrounds_cv <- data.table(
        nrounds = sapply(grid_run, \(x) x[["hyperparameters"]][["best_iter"]])
      )
      nrounds_cv[["param_combo_id"]] <- rep(
        seq_len(n_param_combinations),
        each = n_resamples
      )
      nrounds_by_param_combo_id <- nrounds_cv[,
        lapply(.SD, get(tuner_config[["metrics_aggregate_fn"]])),
        by = param_combo_id
      ]
      # Replace NULL nrounds in tune_results$param_grid with average value of Res nrounds
      stopifnot(tune_results[["param_grid"]][["nrounds"]] == "null")
      param_grid[["nrounds"]] <- tune_results[["param_grid"]][["nrounds"]] <-
        as.integer(round(nrounds_by_param_combo_id[["nrounds"]]))
    }
  } # /LightGBM

  ## GBM, H2OGBM ----
  # if (algorithm %in% c("H2OGBM", "GBM", "GBM3")) {
  #   est.n.trees.all <- data.frame(n.trees = plyr::laply(
  #     grid_run,
  #     function(x) x$est.n.trees
  #   ))
  #   est.n.trees.all$param_combo_id <- rep(seq_len(n_param_combinations), each = n_resamples)
  #   est.n.trees.by.param_combo_id <- aggregate(
  #     n.trees ~ param_combo_id, est.n.trees.all,
  #     metrics_aggregate_fn
  #   )
  #   tune_results <- cbind(
  #     n.trees = round(est.n.trees.by.param_combo_id$n.trees),
  #     tune_results
  #   )
  #   n_params <- n_params + 1
  # } # /GBM, H2OGBM

  ## XGBoost ----
  # if (algorithm == "XGBoost") {
  #   if (verbosity > 1L) {
  #     msg(highlight("Extracting best N of iterations from XGBoost models..."))
  #   }
  #   est.nrounds.all <- data.frame(nrounds = plyr::laply(
  #     grid_run,
  #     \(m) m$best_iteration
  #   ))
  #   est.nrounds.all$param_combo_id <- rep(seq_len(n_param_combinations),
  #     each = n_resamples
  #   )
  #   est.nrounds.by.param_combo_id <- aggregate(
  #     nrounds ~ param_combo_id, est.nrounds.all,
  #     metrics_aggregate_fn
  #   )
  #   tune_results <- cbind(
  #     nrounds = round(est.nrounds.by.param_combo_id$nrounds),
  #     tune_results
  #   )
  #   n_params <- n_params + 1
  # } /XGBoost

  ## LINAD ----
  # if (algorithm %in% c("LINAD", "LINOA")) {
  #   if (verbosity > 1L) {
  #     msg_info("Extracting best N leaves from LINAD models...")
  #   }
  #   est.n.leaves.all <- data.frame(n.leaves = plyr::laply(
  #     grid_run,
  #     \(x) ifelse(length(x$est.n.leaves) == 0, 1, x$est.n.leaves)
  #   ))
  #   est.n.leaves.all$param_combo_id <- rep(seq_len(n_param_combinations),
  #     each = n_resamples
  #   )
  #   est.n.leaves.by.param_combo_id <- aggregate(
  #     n.leaves ~ param_combo_id, est.n.leaves.all,
  #     metrics_aggregate_fn
  #   )
  #   tune_results <- cbind(
  #     n.leaves =
  #       round(est.n.leaves.by.param_combo_id$n.leaves), tune_results
  #   )
  #   n_params <- n_params + 1
  # } # /LINAD, LINOA

  ## LIHADBoost ----
  # if (algorithm == "LIHADBoost") {
  #   if (verbosity > 1L) {
  #     msg(highlight("Extracting best N steps from LIHADBoost models..."))
  #   }
  #   est.n.steps.all <- data.frame(n.steps = plyr::laply(
  #     grid_run,
  #     \(x) x$sel.n.steps
  #   ))
  #   est.n.steps.all$param_combo_id <- rep(seq_len(n_param_combinations),
  #     each = n_resamples
  #   )
  #   est.n.steps.by.param_combo_id <- aggregate(
  #     n.steps ~ param_combo_id, est.n.steps.all,
  #     metrics_aggregate_fn
  #   )
  #   tune_results <- cbind(
  #     n.steps = round(est.n.steps.by.param_combo_id$n.steps),
  #     tune_results
  #   )
  #   n_params <- n_params + 1
  # } # /LIHADBoost

  # Consider explicitly sorting hyperparam values in increasing order,
  # so that in case of tie, lowest value is chosen -
  # if that makes sense, e.g. n.leaves, etc.
  best_param_combo_id <- as.integer(
    tune_results[["metrics_validation"]][
      select_fn(tune_results[["metrics_validation"]][[metric]]),
      1
    ]
  )
  best_param_combo <- as.list(param_grid[best_param_combo_id, -1, drop = FALSE])
  if (verbosity > 0L) {
    msg(
      paste0("Best config to ", paste(verb, metric), ":")
    )
    print_tune_finding(grid_params, best_param_combo)
  }

  # Outro ----
  # Since this is always called from within `train()`, we don't want to print "Completed..."
  outro(start_time, verbosity = verbosity - 1L)

  if (verbosity > 0L) {
    msg(
      fmt("</>", col = col_tuner, bold = TRUE),
      "Tuning done."
    )
  }

  # => add optional mods field to GridSearch
  # if (save_mods) mods <- grid_run
  GridSearch(
    hyperparameters = hyperparameters,
    tuner_config = tuner_config,
    tuning_results = list(
      param_grid = param_grid,
      training = metrics_training_by_combo_id,
      validation = metrics_validation_by_combo_id
    ),
    best_hyperparameters = best_param_combo
  )
} # /rtemis::tune_GridSearch


# %% print_tune_finding ----
#' Print tuning results
#'
#' Prints set of search values and best value in the form {1, 3, 5} => 3
#' for each hyperparameter that was tuned.
#'
#' @author EDG
#' @keywords internal
#' @noRd
print_tune_finding <- function(grid_params, best_param_combo, pad = 22L) {
  # Make list of search values and best value
  tfl <- lapply(seq_along(grid_params), function(i) {
    paste0(
      "{",
      paste(grid_params[[i]], collapse = ", "),
      "}",
      " => ",
      bold(best_param_combo[[names(grid_params)[i]]])
    )
  })
  names(tfl) <- names(grid_params)
  # Capture output to sync with msg stream (stderr)
  out <- utils::capture.output(printls(tfl, print_class = FALSE, pad = pad))
  message(paste(out, collapse = "\n"))
} # /rtemis::print_tune_finding


================================================
FILE: R/utils.R
================================================
# utils.R
# ::rtemis::
# 2016- EDG rtemis.org

#' Print range of continuous variable
#'
#' @param x Numeric vector
#' @param ddSci Logical: If TRUE, use [ddSci] or range.
#' @param decimal_places Integer: Number of decimal place to use if `ddSci = TRUE`.
#' @param na.rm Logical: passed to `base::range`
#'
#' @return Called for its side effect of printing the range of `x`.
#'
#' @author EDG
#' @keywords internal
#' @noRd
show_range <- function(x, ddSci = TRUE, decimal_places = 1, na.rm = TRUE) {
  if (ddSci) {
    paste(
      ddSci(range(x, na.rm = na.rm), decimal_places = decimal_places),
      collapse = " to "
    )
  } else {
    paste(range(x, na.rm = na.rm), collapse = " to ")
  }
} # /rtemis::show_range


#' Set Dynamic Range
#'
#' `rtemis preproc`: Adjusts the dynamic range of a vector or matrix input.
#'   By default normalizes to 0-1 range.
#'
#' @param x Numeric vector or matrix / data frame: Input
#' @param lo Target range minimum. Defaults to 0
#' @param hi Target range maximum. Defaults to 1
#' @param byCol Logical: If TRUE: if `x` is matrix, `drange` each
#' column separately
#'
#' @return Numeric vector.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' x <- runif(20, -10, 10)
#' x <- drange(x)
drange <- function(x, lo = 0, hi = 1, byCol = TRUE) {
  dr <- function(x, lo, hi) {
    .min <- min(x, na.rm = TRUE)
    (x - .min) / max(x - .min, na.rm = TRUE) * (hi - lo) + lo
  }

  if (NCOL(x) > 1) {
    if (byCol) {
      apply(x, 2, function(x) dr(x, lo, hi))
    } else {
      dr(x, lo, hi)
    }
  } else {
    dr(x, lo, hi)
  }
} # /rtemis::drange


#' Factor NA to "missing" level
#'
#' Set NA values of a factor vector to a new level indicating missingness
#'
#' @param x Factor.
#' @param na_level_name Character: Name of new level to create that will be assigned to all current
#' NA values in `x`.
#'
#' @return factor.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' x <- factor(sample(letters[1:3], 100, TRUE))
#' x[sample(1:100, 10)] <- NA
#' xm <- factor_NA2missing(x)
factor_NA2missing <- function(x, na_level_name = "missing") {
  check_inherits(x, "factor")
  if (anyNA(x)) {
    x <- factor(x, levels = c(levels(x), na_level_name))
    x[is.na(x)] <- na_level_name
    x
  } else {
    x
  }
} # /rtemis::factor_NA2missing


#' Filter order
#'
#' @param x Input vector
#' @param idl Logical vector: Index of elements to filter
#' @param decreasing Logical: If TRUE, sort in descending order
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' x <- rnorm(10)
#' x
#' x[filter_order(x, x < 0)]
filter_order <- function(x, idl, decreasing = FALSE) {
  idi <- which(idl)
  flt_ord <- order(x[idi], decreasing = decreasing)
  idi[flt_ord]
}


#' Get the mode of a factor or integer
#'
#' Returns the mode of a factor or integer
#'
#' @param x Vector, factor or integer: Input data.
#' @param na.rm Logical: If TRUE, exclude NAs (using `na.exclude(x)`).
#' @param getlast Logical: If TRUE, get the last value in case of ties.
#' @param retain_class Logical: If TRUE, output is always same class as input.
#'
#' @return The mode of `x`
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- c(9, 3, 4, 4, 0, 2, 2, NA)
#' get_mode(x)
#' x <- c(9, 3, 2, 2, 0, 4, 4, NA)
#' get_mode(x)
#' get_mode(x, getlast = FALSE)
get_mode <- function(
  x,
  na.rm = TRUE,
  getlast = TRUE,
  retain_class = TRUE
) {
  if (retain_class) {
    .class <- class(x)
  }
  if (na.rm) {
    x <- na.exclude(x)
  }
  freq <- table(x)
  if (sum(freq) > 0) {
    if (getlast) {
      .vals <- unique(x)
      out <- .vals[rev(which(.vals %in% names(freq)[which(freq == max(freq))]))[
        1
      ]]
    } else {
      out <- names(freq)[which.max(freq)]
    }
    if (length(out) == 0) out <- NA
  } else {
    out <- NA
  }

  if (retain_class) {
    if (is.factor(x)) {
      out <- factor(out, levels = levels(x))
    } else {
      class(out) <- .class
    }
  }
  out
} # /rtemis::get_mode


#' Check if vector is constant
#'
#' @param x Vector: Input
#' @param skip_missing Logical: If TRUE, skip NA values before test
#'
#' @return Logical.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- rep(9, 1000000)
#' is_constant(x)
#' x[10] <- NA
#' is_constant(x)
#' is_constant(x, skip_missing = TRUE)
is_constant <- function(x, skip_missing = FALSE) {
  # all(duplicated(x)[-1L])
  if (skip_missing) {
    x <- na.exclude(x)
  }
  isTRUE(all(x == x[1]))
} # /rtemis::is_constant


#' Check if variable is discrete (factor or integer)
#'
#' @param x Input
#'
#' @return Logical.
#'
#' @author EDG
#' @keywords internal
#' @noRd
is_discrete <- function(x) {
  is.factor(x) || is.integer(x) || is.logical(x) || is.character(x)
} # /rtemis::is_discrete


#' Return object if it has length > 0
#'
#' Returns the input object if it has length > 0, else NULL
#'
#' @param x Object
#'
#' @return `x` if `length(x) > 0`, else `NULL`
#'
#' @keywords internal
#' @noRd
iflengthy <- function(x) {
  if (length(x) > 0) x else NULL
} # /rtemis::iflengthy


#' @keywords internal
#' @noRd
pval_stars <- function(x) {
  cut(x, breaks = c(0, .001, .01, .05, 1), labels = c("***", "**", "*", ""))
}


#' Format singular/plural noun
#'
#' @keywords internal
#' @noRd
#'
#' @examples
#' singorplu(0, "cat")
#' singorplu(1, "cat")
#' singorplu(2, "cat")
singorplu <- function(n, x) {
  switch(
    as.character(n),
    `0` = paste0("no ", x, "s"),
    `1` = paste("1", x),
    paste0(n, " ", x, "s")
  )
}


#' Size of object
#'
#' Returns the size of an object
#'
#' @details
#' If `dim(x)` is NULL, returns `length(x)`.
#' @param x any object with `length()` or `dim()`.
#' @param verbosity Integer: Verbosity level. If > 0, print size to console
#'
#' @return Integer vector with length equal to the number of dimensions of `x`, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- rnorm(20)
#' size(x)
#' # 20
#' x <- matrix(rnorm(100), 20, 5)
#' size(x)
#' # 20  5
size <- function(x, verbosity = 1L) {
  z <- if (is.null(dim(x))) {
    length(x)
  } else {
    dim(x)
  }
  if (verbosity > 0L) {
    # Format to add "," for thousands
    z_formatted <- format(z, trim = TRUE, big.mark = ",", scientific = FALSE)
    cat(paste(bold(z_formatted), collapse = gray(" x ")), "\n")
  }
  invisible(z)
} # /rtemis::size


#' Recycle values of vector to match length of target.
#'
#' @details
#' If `target` is longer than `x`, the values of `x` will be recycled to match the length of
#' `target`. If `x` is longer than `target`, the values of `x` will be truncated to match the
#' length of `target`.
#' Used internally by many functions.
#'
#' @param x Vector to be recycled
#' @param target Object whose length defines target length
#'
#' @return Vector.
#'
#' @author EDG
#' @keywords internal
#' @noRd
recycle <- function(x, target) {
  lenx <- length(x)
  lent <- length(target)
  rep(x, ceiling(lent / lenx))[seq_len(lent)]
} # /rtemis::recycle


#' Random Normal Matrix
#'
#' Create a matrix or data frame of defined dimensions, whose columns are random normal vectors
#'
#' @param nrow Integer: Number of rows.
#' @param ncol Integer: Number of columns.
#' @param mean Float: Mean.
#' @param sd Float: Standard deviation.
#' @param return_df Logical: If TRUE, return data.frame, otherwise matrix.
#' @param seed Integer: Set seed for `rnorm`.
#'
#' @return `matrix` or `data.frame`.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- rnormmat(20, 5, mean = 12, sd = 6, return_df = TRUE, seed = 2026)
#' x
rnormmat <- function(
  nrow = 10,
  ncol = 10,
  mean = 0,
  sd = 1,
  return_df = FALSE,
  seed = NULL
) {
  if (length(mean) != ncol) {
    mean <- rep_len(mean, ncol)
  }
  if (length(sd) != ncol) {
    sd <- rep_len(sd, ncol)
  }

  if (!is.null(seed)) {
    set.seed(seed)
  }
  mat <- sapply(seq_len(ncol), function(j) {
    rnorm(nrow, mean = mean[j], sd = sd[j])
  })
  if (return_df) {
    mat <- as.data.frame(mat)
  }
  mat
} # /rtemis::rnormmat


#' Random Uniform Matrix
#'
#' Create a matrix or data frame of defined dimensions, whose columns are random uniform vectors
#'
#' @param nrow Integer: Number of rows.
#' @param ncol Integer: Number of columns.
#' @param min Float: Min.
#' @param max Float: Max.
#' @param return_df Logical: If TRUE, return data.frame, otherwise matrix.
#' @param seed Integer: Set seed for `rnorm`.
#'
#' @return `matrix` or `data.frame`.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- runifmat(20, 5, min = 12, max = 18, return_df = TRUE, seed = 2026)
#' x
runifmat <- function(
  nrow = 10,
  ncol = 10,
  min = 0,
  max = 1,
  return_df = FALSE,
  seed = NULL
) {
  if (length(min) < ncol) {
    min <- rep(min, ncol / length(min))
  }
  if (length(max) < ncol) {
    max <- rep(max, ncol / length(max))
  }

  if (!is.null(seed)) {
    set.seed(seed)
  }
  mat <- sapply(seq_len(ncol), function(j) runif(nrow, min = min, max = max))
  if (return_df) {
    mat <- as.data.frame(mat)
  }
  mat
} # /rtemis::runifmat


#' Get rtemis version and system info
#'
#' @return List: rtemis version and system info, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' rtversion()
rtversion <- function() {
  out <- c(
    list(rtemis_version = as.character(packageVersion("rtemis"))),
    as.list(Sys.info())
  )
  printls(out, print_class = FALSE)
  invisible(out)
} # /rtemis::rtversion


#' Symmetric Set Difference
#'
#' @param x vector
#' @param y vector of same type as `x`
#'
#' @return Vector.
#'
#' @author EDG
#' @export
#'
#' @examples
#' setdiff(1:10, 1:5)
#' setdiff(1:5, 1:10)
#' setdiffsym(1:10, 1:5)
#' setdiffsym(1:5, 1:10)
setdiffsym <- function(x, y) {
  union(setdiff(x, y), setdiff(y, x))
} # /rtemis::setdiffsym


#' Initialize Project Directory
#'
#' Initializes Directory Structure: "R", "Data", "Results"
#'
#' @param path Character: Path to initialize project directory in.
#' @param output_dir Character: Name of output directory to create.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Character: the path where the project directory was initialized, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' \dontrun{
#' # Will create "my_project" directory with
#' init_project_dir("my_project")
#' }
init_project_dir <- function(path, output_dir = "Out", verbosity = 1L) {
  if (verbosity > 0L) {
    msg("Initializing project directory...")
  }
  path <- normalizePath(path, mustWork = FALSE)

  # Create project directory if it doesn't exist
  if (!dir.exists(path)) {
    if (verbosity > 0L) {
      cat("  > Creating ", bold(path), " folder...", sep = "")
    }
    dir.create(path, recursive = TRUE)
    if (dir.exists(path)) {
      if (verbosity > 0L) {
        yay()
      }
    } else {
      if (verbosity > 0L) {
        nay()
      }
      cli::cli_abort(
        "Failed to create project directory at {.file {path}}. Check path & permissions."
      )
    }
  }

  # Log file: rtemis_init.log ----
  logfile_path <- file.path(path, "rtemis_init.log")
  sink(file = logfile_path, append = TRUE)
  cat("<rtemis Project>\n")
  cat("Initialized: ", datetime(), "\n", sep = "")
  cat("--------------------------------\n")
  print(sessionInfo())
  sink()

  # Directories: /R /Data /output_dir ----
  dirs <- file.path(path, c("R", "Data", output_dir))
  for (i in dirs) {
    if (verbosity > 0L) {
      cat("  > Creating ", bold(i), " folder...", sep = "")
    }
    if (!dir.exists(i)) {
      dir.create(i)
      if (dir.exists(i)) {
        if (verbosity > 0L) yay()
      } else {
        if (verbosity > 0L) nay()
      }
    } else {
      if (verbosity > 0L) cat(orange(" Already present\n", bold = TRUE))
    }
  }
  if (verbosity > 0L) {
    msg("Done.")
  }
  invisible(path)
} # /rtemis::init_project_dir


================================================
FILE: R/utils_art.R
================================================
# utils_art.R
# ::rtemis::
# 2025- EDG rtemis.org

#' Color columns of text art
#'
#' This function accepts text input of 1 or more lines and two colors.
#' It will:
#' a) generate a color gradient between the two colors
#' b) apply the gradient to each column of the text, creating a left to right color gradient.
#'
#' @param x Character vector of text to colorize.
#' @param color_left Color for the left side of the gradient.
#' @param color_right Color for the right side of the gradient.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character vector with color formatting applied to each column.
#'
#' @author EDG
#' @keywords internal
#' @noRd
color_txt_columns <- function(
  x,
  color_left,
  color_right,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)
  # Count number of columns in input text
  ncols <- max(nchar(x, type = "width"))

  if (ncols == 0) {
    return(x)
  }

  # Create color gradient from color_left to color_right with ncols steps
  gradient <- grDevices::colorRampPalette(c(color_left, color_right))(ncols)

  # Apply the colors to each column of the text
  result <- character(length(x))

  for (i in seq_along(x)) {
    line <- x[i]
    line_chars <- strsplit(line, "")[[1]]
    line_width <- nchar(line, type = "width")

    if (line_width == 0) {
      result[i] <- line
      next
    }

    colored_chars <- character(length(line_chars))

    for (j in seq_along(line_chars)) {
      char <- line_chars[j]
      if (char == " ") {
        colored_chars[j] <- char
      } else {
        # Use column position for gradient color
        col_pos <- min(j, ncols)
        colored_chars[j] <- fmt(
          char,
          col = gradient[col_pos],
          output_type = output_type
        )
      }
    }

    result[i] <- paste0(colored_chars, collapse = "")
  }

  result
} # /rtemis::color_txt_columns


#' Color rows of text art
#'
#' This function accepts text input of 1 or more lines and two colors.
#' It will:
#' a) generate a color gradient between the two colors
#' b) apply the gradient to each row of the text, creating a top to bottom color gradient.
#'
#' @param x Character vector of text to colorize.
#' @param color_top Color for the top of the gradient.
#' @param color_bottom Color for the bottom of the gradient.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character vector with color formatting applied to each row.
#'
#' @author EDG
#' @keywords internal
#' @noRd
color_txt_rows <- function(
  x,
  color_top,
  color_bottom,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  # Number of rows
  nrows <- length(x)

  if (nrows == 0) {
    return(x)
  }

  # Create color gradient from color_top to color_bottom with nrows steps
  gradient <- grDevices::colorRampPalette(c(color_top, color_bottom))(nrows)

  # Apply the colors to each row of the text
  result <- character(nrows)

  for (i in seq_along(x)) {
    line <- x[i]
    line_chars <- strsplit(line, "")[[1]]
    line_width <- nchar(line, type = "width")

    if (line_width == 0) {
      result[i] <- line
      next
    }

    colored_chars <- character(length(line_chars))

    for (j in seq_along(line_chars)) {
      char <- line_chars[j]
      if (char == " ") {
        colored_chars[j] <- char
      } else {
        colored_chars[j] <- fmt(
          char,
          col = gradient[i],
          output_type = output_type
        )
      }
    }

    result[i] <- paste0(colored_chars, collapse = "")
  }

  result
} # /rtemis::color_txt_rows


#' pkglogo
#'
#' @param pkg Character: Package name.
#' @param filename Character: Filename of the logo file (without path).
#' @param fmt_fn Function: "color_txt_columns", "color_text_rows" Formatting function to apply to
#' the logo text.
#' @param args List: Arguments to pass to `fmt_fn`.
#' @param pad Integer: Left-pad output with this many spaces.
#'
#' @return Character: Formatted logo text.
#'
#' @author EDG
#' @keywords internal
#' @noRd
pkglogo <- function(
  pkg = .packageName,
  filename = paste0(pkg, ".utf8"),
  fmt_fn = color_txt_columns,
  args = list(
    color_left = kaimana_red,
    color_right = coastside_orange,
    output_type = "ansi"
  ),
  pad = 2L
) {
  logo_file <- system.file(
    package = .packageName,
    "resources",
    filename
  )
  logo_txt <- readLines(logo_file)
  paste0(
    strrep(" ", pad),
    do.call(fmt_fn, c(list(x = logo_txt), args)),
    collapse = "\n"
  )
} # /rtemis::pkglogo


#' Show colors
#'
#' Display color previews with ANSI color blocks
#'
#' @param x Named vector or list of colors to preview.
#' @param pad Integer: Pad output with this many spaces.
#' @param center_title Logical: If TRUE, autopad title for centering, if present.
#' @param title Character: Optional title to display.
#' @param title_newline Logical: If TRUE, add newline after title.
#' @param limit Integer: Maximum number of colors to show. Set to -1L for no limit.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted string that can be printed with cat()
#'
#' @author EDG
#' @keywords internal
#' @noRd
show_col <- function(
  x,
  pad = 2L,
  center_title = TRUE,
  title = NULL,
  title_newline = TRUE,
  limit = 12L,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  # Helper function to build padded string equivalent of padcat
  build_padcat <- function(
    text,
    pad = 2L,
    newline_pre = FALSE,
    newline = FALSE
  ) {
    result <- ""
    if (newline_pre) {
      result <- paste0(result, "\n")
    }
    result <- paste0(result, strrep(" ", pad))
    result <- paste0(result, text)
    if (newline) {
      result <- paste0(result, "\n")
    }
    result
  }

  # Initialize output string
  result <- ""

  if (is.null(x)) {
    if (!is.null(title)) {
      result <- paste0(
        result,
        highlight(build_padcat(title, pad = pad, newline = title_newline))
      )
    }
    result <- paste0(result, strrep(" ", pad), "NULL")
    return(result)
  }

  if (length(x) == 0) {
    result <- paste0(result, class(x), " of length 0.\n")
    return(result)
  }

  # Convert to named list if needed
  if (is.null(names(x)) && !is.list(x)) {
    names(x) <- as.character(x)
  }

  x <- as.list(x)
  xnames <- names(x)
  if (is.null(xnames)) {
    xnames <- paste0("color_", seq_along(x))
  }

  # Calculate left-hand side width
  lhs <- max(nchar(xnames)) + pad

  # Add title if provided
  if (!is.null(title)) {
    title_pad <- if (center_title) {
      max(0, lhs - round((.5 * nchar(title))) - 3)
    } else {
      0
    }
    result <- paste0(
      result,
      highlight(build_padcat(title, pad = title_pad, newline = title_newline)),
      "\n"
    )
  }

  # Show limit message if needed
  counter <- 0L
  if (limit != -1L && length(x) > limit) {
    limit_text <- paste0(
      italic(
        gray(
          paste0(
            "Showing first ",
            limit,
            " of ",
            length(x),
            " colors.\n"
          ),
          output_type = output_type
        ),
        output_type = output_type
      )
    )
    result <- paste0(result, build_padcat(limit_text, pad = pad))
  }

  # Display each color
  for (i in seq_along(x)) {
    counter <- counter + 1L
    if (limit != -1L && counter > limit) {
      more_text <- paste0(
        italic(
          gray(
            paste0(
              "...",
              length(x) - limit,
              " more colors not shown.\n"
            )
          ),
          output_type = output_type
        )
      )
      result <- paste0(result, build_padcat(more_text, pad = pad))
      break
    }

    # Get color value
    color_val <- x[[i]]

    # Create color blocks: 2 solid, 2 medium, 2 light
    if (output_type == "ansi") {
      # Use the color directly
      color_display <- tryCatch(
        {
          # Create blocks with varying intensities
          solid_block <- fmt(
            "\u2588",
            col = color_val,
            output_type = output_type
          )
          medium_block <- fmt(
            "\u2593",
            col = color_val,
            output_type = output_type
          )
          light_block <- fmt(
            "\u2591",
            col = color_val,
            output_type = output_type
          )

          paste0(
            solid_block,
            solid_block,
            medium_block,
            medium_block,
            light_block,
            light_block
          )
        },
        error = function(e) {
          # Fallback if color conversion fails
          paste0(
            "\u2588\u2588\u2593\u2593\u2591\u2591 (",
            color_val,
            ")"
          )
        }
      )
    } else {
      # For non-ANSI output, just show the color value
      color_display <- paste0(
        "\u2588\u2588\u2593\u2593\u2591\u2591 (",
        color_val,
        ")"
      )
    }

    # Format and add the line
    item_text <- paste0(
      bold(
        format(
          xnames[i],
          width = lhs,
          justify = "right"
        ),
        output_type = output_type
      ),
      ": ",
      color_display,
      "\n"
    )
    result <- paste0(result, item_text)
  }

  result
} # /rtemis::show_col


================================================
FILE: R/utils_async.R
================================================
# utils_async.R
# ::rtemis::
# 2026- EDG rtemis.org

# Define allowed future plans
ALLOWED_PLANS <- c(
  "sequential",
  "multicore",
  "multisession",
  "cluster",
  "remote",
  "transparent",
  "future.mirai::mirai_multisession", # what user sets
  "mirai_multisession" # what future::plan() returns
)


#' Check if system is Windows
#'
#' @return Logical: TRUE if Windows, FALSE otherwise
#' @noRd
is_windows <- function() {
  tolower(Sys.info()[["sysname"]]) == "windows"
} # /is_windows


#' Identify future plan
#'
#' @return Character: Name of current plan
#'
#' @noRd
identify_plan <- function(x = NULL) {
  if (is.null(x)) {
    x <- future::plan()
  }
  for (p in ALLOWED_PLANS) {
    if (inherits(x, p)) {
      return(p)
    }
  }
  cli::cli_abort(
    "Detected future plan not in allowed plans ({.val {ALLOWED_PLANS}}). Detected plan class: {.val {class(x)}}"
  )
} # /rtemis::identify_plan


#' Set preferred plan
#'
#' Sets the future plan according to system and user preference:
#' - Check whether a plan has been set by the user
#' - Check whether there is an option set for future plan
#' - Check available cores
#' - Check if Windows
#'
#' @param requested_plan Optional character: Requested plan, one of "multicore", "multisession", "sequential".
#' @param n_workers Optional integer: Number of workers to use.
#'
#' @return Character: Name of plan set
#'
#' @author EDG
#' @keywords internal
#' @noRd
set_preferred_plan <- function(
  requested_plan = NULL,
  n_workers = NULL,
  envir = parent.frame(),
  verbosity = 1L
) {
  # If user has requested a specific plan, try to set it
  if (!is.null(requested_plan)) {
    # Security check
    if (!requested_plan %in% ALLOWED_PLANS) {
      cli::cli_abort(
        "Requested plan {.val {requested_plan}} is not one of allowed plans: {.val {ALLOWED_PLANS}}"
      )
    }
    # future::plan will determine workers if NULL & will set to sequential if only 1 core available
    # therefore plan set by following call is not always the requested one and needs to be
    # determined.

    if (requested_plan == "sequential") {
      with(
        future::plan(strategy = requested_plan),
        local = TRUE,
        envir = envir
      )
    } else {
      with(
        future::plan(strategy = requested_plan, workers = n_workers),
        local = TRUE,
        envir = envir
      )
    }

    return(identify_plan())
  }

  # If user has not requested a specific plan, check if they have set one
  current_plan <- future::plan()

  # If the plan is not sequential, we must assume user set it and respect it (though it might
  # have been set by a different package)
  if (!inherits(current_plan, "sequential")) {
    return(identify_plan(current_plan))
  }
  # If the plan is sequential, we can't currently tell if it was set by the user or is default
  # -> Ideally, we would know this. <-
  # We therefore proceed to set our preferred plan based on OS, n available cores, and requested
  # n workers.
  # If n_workers was set to 1 and no requested_plan was defined, use sequential
  if (!is.null(n_workers) && n_workers == 1L) {
    with(
      future::plan(strategy = "sequential"),
      local = TRUE,
      envir = envir
    )
    return("sequential")
  }

  if (is_windows()) {
    # On Windows, multicore is not available
    preferred_plan <- "multisession"
  } else {
    preferred_plan <- "multicore"
  }
  with(
    future::plan(strategy = preferred_plan, workers = n_workers),
    local = TRUE,
    envir = envir
  )
  # This will still be sequential and not "preferred_plan" if n_workers = 1
  identify_plan()
} # /set_preferred_plan


================================================
FILE: R/utils_checks.R
================================================
# utils_checks.R
# ::rtemis::
# 2024- EDG rtemis.org

# clean_* functions performm checks and return clean inputs.
# check_* functions perform checks (do not return a value).

# %% test_inherits ----
#' Check class of object
#'
#' @param x Object to check
#' @param cl Character: class to check against
#'
#' @return Logical
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' test_inherits("papaya", "character") # TRUE
#' test_inherits(c(1, 2.5, 3.2), "integer")
#' test_inherits(iris, "list") # FALSE, compare to is_check(iris, is.list)
test_inherits <- function(x, cl) {
  if (!inherits(x, cl)) {
    input <- deparse(substitute(x))
    message(red(bold(input), "is not", bold(cl)))
    return(FALSE)
  }
  TRUE
} # /rtemis::test_inherits


# %% check_inherits ----
#' Check class of object
#'
#' @param x Object to check.
#' @param cl Character: class to check against.
#' @param allow_null Logical: If TRUE, NULL values are allowed and return early.
#'
#' @return Called for side effects. Throws an error if checks fail.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
#'
#' @examples
#' check_inherits("papaya", "character")
#' # These will throw errors:
#' # check_inherits(c(1, 2.5, 3.2), "integer")
#' # check_inherits(iris, "list")
check_inherits <- function(
  x,
  cl,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (!inherits(x, cl)) {
    cli::cli_abort(
      "{.var {xname}} must be of class {.cls {cl}}."
    )
  }

  invisible()
} # /rtemis::check_inherits


# %% clean_int ----
#' Clean integer input
#'
#' @details
#' The goal is to return an integer vector.
#' If the input is integer, it is returned as is.
#' If the input is numeric, it is coerced to integer only if the numeric values are integers,
#' otherwise an error is thrown.
#'
#' @param x Double or integer vector to check.
#'
#' @return Integer vector
#' @author EDG
#'
#' @keywords internal
#' @noRd
#'
#' @examples
#' clean_int(6L)
#' clean_int(3)
#' # clean_int(12.1) # Error
#' clean_int(c(3, 5, 7))
#' # clean_int(c(3, 5, 7.01)) # Error
clean_int <- function(x, xname = deparse(substitute(x))) {
  if (is.integer(x)) {
    return(x)
  } else if (is.numeric(x)) {
    if (all(x %% 1 == 0)) {
      return(as.integer(x))
    } else {
      cli::cli_abort("{.var {xname}} must be integer.")
    }
  } else if (is.null(x)) {
    return(NULL)
  }
  cli::cli_abort("{.var {xname}} must be integer.")
} # /rtemis::clean_int


# %% match_arg ----
#' Match Arguments Ignoring Case
#'
#' @param x Character: Argument to match.
#' @param choices Character vector: Choices to match against.
#'
#' @return Character: Matched argument.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
#'
#' @examples
#' match_arg("papaya", c("AppleExtreme", "SuperBanana", "PapayaMaster"))
match_arg <- function(x, choices) {
  out <- match.arg(tolower(x), tolower(choices))
  grep(out, choices, value = TRUE, ignore.case = TRUE)
} # /rtemis::match_arg


# %% check_logical ----
#' Check logical
#'
#' @param x Vector to check
#' @param allow_null Logical: If TRUE, NULL values are allowed and return early.
#'
#' @return Called for side effects. Throws an error if checks fail.
#' @author EDG
#'
#' @keywords internal
#' @noRd
check_logical <- function(
  x,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  }
  if (!is.logical(x)) {
    cli::cli_abort("{.var {xname}} must be logical.")
  }

  invisible()
} # /rtemis::check_logical


# %% check_character ----
#' Check character
#'
#' @param x Vector to check
#' @param allow_null Logical: If TRUE, NULL values are allowed and return early.
#'
#' @return Called for side effects. Throws an error if checks fail.
#'
#' @author EDG
#' @keywords internal
#' @noRd
check_character <- function(
  x,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  }
  if (!is.character(x)) {
    cli::cli_abort("{.var {xname}} must be character.")
  }

  invisible()
} # /rtemis::check_character


# %% check_floatpos ----
#' Check positive float
#'
#' @details
#' Checking with `is.numeric()` allows integer inputs as well, which should be ok since it is
#' unlikely the function that consumes this will enforce double type only, but instead is most
#' likely to allow implicit coercion from integer to numeric.
#'
#' @param x Float vector.
#' @param allow_null Logical: If TRUE, NULL values are allowed and return early.
#'
#' @return Called for side effects. Throws an error if checks fail, otherwise invisible().
#'
#' @author EDG
#' @keywords internal
#' @noRd
check_floatpos <- function(
  x,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (!is.numeric(x)) {
    cli::cli_abort("{.var {xname}} must be numeric.")
  }

  if (anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  }

  if (any(x <= 0)) {
    cli::cli_abort("{.var {xname}} must be greater than 0.")
  }

  invisible()
} # /rtemis::check_floatpos


# %% check_float01exc ----
#' Check float between 0 and 1, exclusive
#'
#' @param x Vector to check
#' @param allow_null Logical: If TRUE, NULL values are allowed and return early.
#'
#' @return Called for side effects. Throws an error if checks fail.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#' @examples
#' check_float01exc(0.5)
check_float01exc <- function(
  x,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (!is.numeric(x)) {
    cli::cli_abort("{.var {xname}} must be numeric.")
  }

  if (anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  }

  if (any(x <= 0 | x >= 1)) {
    cli::cli_abort(
      "{.var {xname}} must be between 0 and 1, exclusive."
    )
  }

  invisible()
} # /rtemis::check_float01exc


# %% check_float01inc ----
#' Check float between 0 and 1, inclusive
#'
#' @param x Float vector.
#' @param allow_null Logical: If TRUE, NULL values are allowed and return early.
#'
#' @return Called for side effects. Throws an error if checks fail.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#' @examples
#' check_float01inc(0.5)
check_float01inc <- function(
  x,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (!is.numeric(x)) {
    cli::cli_abort(
      "{.var {xname}} must be numeric. Received: {.val {x}} of class {class(x)}",
      call. = FALSE
    )
  }

  if (anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  }

  if (any(x < 0 | x > 1)) {
    cli::cli_abort("{.var {xname}} must be between 0 and 1, inclusive.")
  }

  invisible()
} # /rtemis::check_float01


# %% check_floatpos1 ----
check_floatpos1 <- function(
  x,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (!is.numeric(x)) {
    cli::cli_abort("{.var {xname}} must be numeric.")
  }

  if (anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  }

  if (any(x <= 0) || any(x > 1)) {
    cli::cli_abort(
      "{.var {xname}} must be greater than 0 and less or equal to 1."
    )
  }

  invisible()
} # /rtemis::check_floatpos1


# %% clean_posint ----
#' Check positive integer
#'
#' @param x Integer vector.
#'
#' @return x, otherwise error.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' clean_posint(5)
clean_posint <- function(x, allow_na = FALSE, xname = deparse(substitute(x))) {
  if (is.null(x)) {
    return(NULL)
  }

  if (!allow_na && anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  } else {
    x <- na.exclude(x)
  }

  if (any(x <= 0)) {
    cli::cli_abort("{.var {xname}} must contain only positive integers.")
  }

  clean_int(x, xname = xname)
} # /rtemis::clean_posint


# %% check_float0pos ----
#' Check float greater than or equal to 0
#'
#' Checks if an input is a numeric vector containing non-negative
#'   (>= 0) values and no `NA`s. It is designed to validate function arguments.
#'
#' @param x Numeric vector: The input object to check.
#' @param allow_null Logical: If TRUE, NULL values are allowed and return early.
#'
#' @return Called for side effects. Throws an error if checks fail.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
check_float0pos <- function(
  x,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (!is.numeric(x)) {
    cli::cli_abort("{.var {xname}} must be numeric.")
  }

  if (anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  }

  if (any(x < 0)) {
    cli::cli_abort("{.var {xname}} must be zero or greater.")
  }

  invisible()
} # /rtemis::check_float0pos


# %% check_float_neg1_1 ----
#' Check float -1 <= x <= 1
#'
#' @param x Numeric vector: The input object to check.
#' @param allow_null Logical: If TRUE, NULL values are allowed and return early.
#'
#' @return Called for side effects. Throws an error if checks fail.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
check_float_neg1_1 <- function(
  x,
  allow_null = TRUE,
  xname = deparse(substitute(x))
) {
  if (allow_null && is.null(x)) {
    return(invisible())
  }

  if (is.null(x)) {
    cli::cli_abort("{.var {xname}} cannot be NULL.")
  }

  if (!is.numeric(x)) {
    cli::cli_abort("{.var {xname}} must be numeric.")
  }

  if (anyNA(x)) {
    cli::cli_abort("{.var {xname}} must not contain NAs.")
  }

  if (any(x < -1 | x > 1)) {
    cli::cli_abort("{.var {xname}} must be between -1 and 1, inclusive.")
  }

  invisible()
} # /rtemis::check_float_neg1_1


# %% abbreviate_class ----
#' Abbreviate object class name
#'
#' @param x Object
#'
#' @return Character: Abbreviated class
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
abbreviate_class <- function(x, n = 4L) {
  paste0("<", abbreviate(class(x)[1], minlength = n), ">")
} # /rtemis::abbr_class


# %% check_dependencies ----
#' \pkg{rtemis} internal: Dependencies check
#'
#' Checks if dependencies can be loaded; names missing dependencies if not.
#'
#' @param ... List or vector of strings defining namespaces to be checked
#' @param verbosity Integer: Verbosity level.
#' Note: An error will always printed if dependencies are missing.
#' Setting this to FALSE stops it from printing
#' "Dependencies check passed".
#'
#' @return Called for side effects. Aborts and prints list of missing dependencies, if any.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
check_dependencies <- function(..., verbosity = 0L) {
  ns <- as.list(c(...))
  err <- !sapply(ns, \(i) requireNamespace(i, quietly = TRUE))
  if (any(err)) {
    cli::cli_abort(
      paste0(
        "Please install the following ",
        ngettext(sum(err), "dependency", "dependencies"),
        ":\n",
        pastels(ns[err], bullet = "    -")
      )
    )
  } else {
    if (verbosity > 0L) msg("Dependency check passed")
  }
  invisible()
} # /rtemis::check_dependencies


# %% check_data.table ----
#' Check data.table
#'
#' @param x Object to check.
#'
#' @return Called for side effects. Throws an error if input is not a data.table, returns x
#' invisibly otherwise.
#'
#' @author EDG
#' @keywords internal
#' @noRd
check_data.table <- function(x, xname = deparse(substitute(x))) {
  if (!data.table::is.data.table(x)) {
    cli::cli_abort("{.var {xname}} must be a data.table.")
  }
  invisible(x)
} # /rtemis::check_data.table


# %% check_tabular ----
#' Check object is tabular
#'
#' Checks if object is of class `data.frame`, `data.table`, or `tbl_df`.
#'
#' @param x Object to check.
#'
#' @return Called for side effects. Throws an error if input is not tabular, returns x invisibly
#' otherwise.
#'
#' @author EDG
#' @keywords internal
#' @noRd
check_tabular <- function(x) {
  if (!inherits(x, c("data.frame", "data.table", "tbl_df"))) {
    cli::cli_abort(
      "{.var {deparse(substitute(x))}} must be a data.frame, data.table, or tbl_df."
    )
  }
  invisible(x)
} # /rtemis::check_tabular


================================================
FILE: R/utils_color.R
================================================
# utils_color.R
# ::rtemis::
# 2016- EDG rtemis.org

#' Simple Color Operations
#'
#' Invert a color or calculate the mean of two colors in HSV or RGB space.
#' This may be useful in creating colors for plots
#'
#' The average of two colors in RGB space will often pass through gray,
#' which is likely undesirable. Averaging in HSV space, better for most applications.
#' @param col Input color(s)
#' @param fn Character: "invert", "mean": Function to perform
#' @param space Character: "HSV", "RGB": Colorspace to operate in - for
#' averaging only
#'
#' @return Color
#'
#' @author EDG
#' @keywords internal
#' @noRd
color_op <- function(col, fn = c("invert", "mean"), space = c("HSV", "RGB")) {
  # Arguments ----
  fn <- match.arg(fn)
  space <- match.arg(space)

  # Colors ----
  col <- as.list(col)
  col.rgb <- col2rgb(col, alpha = TRUE)

  if (fn == "invert") {
    inverted <- apply(col.rgb, 2, \(i) 255 - i)
    # maintain alpha
    inverted[4, ] <- col.rgb[4, ]
    invertedl <- lapply(seq_len(NCOL(inverted)), \(i) {
      rgb(
        inverted[1, i],
        inverted[2, i],
        inverted[3, i],
        inverted[4, i],
        maxColorValue = 255
      )
    })
    if (!is.null(names(col))) {
      names(invertedl) <- paste0(names(col), ".invert")
    }
    return(invertedl)
  } else if (fn == "mean") {
    if (length(col) < 2) {
      cli::cli_abort("Need at least two colors to average")
    }
    if (space == "RGB") {
      averaged <- rowMeans(col.rgb)
      averaged <- rgb(
        averaged[1],
        averaged[2],
        averaged[3],
        averaged[4],
        maxColorValue = 255
      )
      return(list(average = averaged))
    } else if (space == "HSV") {
      # Convert HSV to RGB
      col.hsv <- rgb2hsv(col.rgb[1:3, ])
      # Get mean HSV values
      averaged <- rowMeans(col.hsv)
      # Get mean alpha from RGB
      alpha <- mean(col.rgb[4, ])
      # Turn to hex
      averaged <- hsv(averaged[1], averaged[2], averaged[3], alpha / 255)
      return(averaged)
    }
  }
} # /rtemis::color_op


#' Color to Grayscale
#'
#' Convert a color to grayscale
#'
#' Uses the NTSC grayscale conversion:
#' 0.299 * R + 0.587 * G + 0.114 * B
#'
#' @param x Color to convert to grayscale
#' @param what Character: "color" returns a hexadecimal color,
#' "decimal" returns a decimal between 0 and 1
#'
#' @return Character: color hex code.
#'
#' @author EDG
#' @export
#'
#' @examples
#' col2grayscale("red")
#' col2grayscale("red", "dec")
col2grayscale <- function(x, what = c("color", "decimal")) {
  what <- match.arg(what)
  col <- col2rgb(x)
  gs <- (0.299 * col[1, ] + 0.587 * col[2, ] + 0.114 * col[3, ]) / 255
  if (what == "color") {
    grDevices::gray(gs)
  } else {
    gs
  }
} # /rtemis::col2grayscale


#' Invert Color in RGB space
#'
#' @param x Color, vector
#'
#' @return Inverted colors using hexadecimal notation `#RRGGBBAA`.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' cols <- c("red", "green", "blue")
#' previewcolor(cols)
#' cols |>
#'   color_invertRGB() |>
#'   previewcolor()
color_invertRGB <- function(x) {
  col <- as.list(x)
  col_rgb <- col2rgb(col, alpha = TRUE)
  inverted <- apply(col_rgb, 2, \(i) 255 - i)
  # maintain alpha
  inverted[4, ] <- col_rgb[4, ]
  invertedl <- sapply(seq_len(NCOL(inverted)), \(i) {
    rgb(
      inverted[1, i],
      inverted[2, i],
      inverted[3, i],
      inverted[4, i],
      maxColorValue = 255
    )
  })
  if (!is.null(names(col))) {
    names(invertedl) <- paste0(names(col), ".invert")
  }
  invertedl
} # /rtemis::color_invertRGB


#' Fade color towards target
#'
#' @param x Color source
#' @param to Target color
#' @param pct Numeric (0, 1) fraction of the distance in RGBA space between
#' `x` and `to` to move. e.g. .5 gets the mean RGBA value of the two
#'
#' @return Color in hex notation
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' previewcolor(c("red", color_fade("red", "blue", .5), "blue"))
color_fade <- function(x, to = "#000000", pct = .5) {
  col <- col2rgb(x, alpha = TRUE)
  col2 <- col2rgb(to, alpha = TRUE)
  d <- (col2 - col) * pct
  colf <- (col + d) / 255
  rgb(colf[1], colf[2], colf[3], colf[4])
}


#' Pastelify a color (make a color more pastel)
#'
#' Lower a color's saturation by a given percent in the HSV color system
#'
#' @param x Color vector: Color(s) to operate on
#' @param s Float: Decrease saturation by this fraction. For example, if `s = 0.3` and saturation of
#' input color is 1, it will become 0.7.
#'
#' @return Character vector with hex codes of modified colors.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' cols <- c("red", "green", "blue")
#' previewcolor(cols)
#' cols_d <- desaturate(cols)
#' previewcolor(cols_d)
desaturate <- function(x, s = 0.3) {
  # Infer color names, if available
  if (!is.null(names(x))) {
    .names <- names(x)
  } else if (is.character(x)) {
    .names <- x
  } else {
    .names <- NULL
  }

  x <- lapply(x, col2rgb)
  x <- lapply(x, rgb2hsv)
  xp <- lapply(x, function(i) {
    .s <- i[2]
    i[2] <- .s - (.s * s)
    hsv(i[1], i[2], i[3])
  })

  names(xp) <- .names
  unlist(xp)
} # /rtemis::desaturate


#' Convert R color to hexadecimal code
#'
#' Convert a color that R understands into the corresponding hexadecimal code
#'
#' @param color Color(s) that R understands
#'
#' @return Character vector of hexadecimal codes.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' col2hex(c("gray50", "skyblue"))
col2hex <- function(color) {
  .rgb <- col2rgb(color)
  sapply(seq_along(color), function(i) {
    paste0(
      "#",
      paste0(
        sprintf(
          "%02s",
          c(
            as.character(as.hexmode(.rgb[1, i])),
            as.character(as.hexmode(.rgb[2, i])),
            as.character(as.hexmode(.rgb[3, i]))
          )
        ),
        collapse = ""
      )
    )
  })
} # /rtemis::col2hex


#' Adjust HSV Color
#'
#' Modify alpha, hue, saturation and value (HSV) of a color
#'
#' @param color Input color. Any format that grDevices::col2rgb() recognizes
#' @param alpha Numeric: Scale alpha by this amount. Future: replace with absolute setting
#' @param hue Float: How much hue to add to `color`
#' @param sat Float: How much saturation to add to `color`
#' @param val Float: How much to increase value of `color` by
#'
#' @return Adjusted color
#'
#' @author EDG
#' @export
#'
#' @examples
#' previewcolor(c(teal = "#00ffff", teal50 = color_adjust("#00ffff", alpha = 0.5)))
color_adjust <- function(color, alpha = NULL, hue = 0, sat = 0, val = 0) {
  ac <- color
  # HSV ----
  ac.hsv <- grDevices::rgb2hsv(grDevices::col2rgb(ac))
  ac <- grDevices::hsv(ac.hsv[1] + hue, ac.hsv[2] + sat, ac.hsv[3] + val)
  # alpha ----
  if (!is.null(alpha)) {
    ac <- adjustcolor(ac, alpha.f = alpha)
  }
  ac
} # /rtemis::color_adjust


#' Preview color
#'
#' Preview one or multiple colors using little rhombi with their little labels up top
#'
#' @param x Color, vector: One or more colors that R understands
#' @param main Character: Title. Default = NULL, which results in
#' `deparse(substitute(x))`
#' @param bg Background color.
#' @param main_col Color: Title color
#' @param main_x Float: x coordinate for `main`.
#' @param main_y Float: y coordinate for `main`.
#' @param main_adj Float: `adj` argument to mtext for `main`.
#' @param main_cex Float: character expansion factor for `main`.
#' @param main_font Integer, 1 or 2: Weight of `main` 1: regular, 2: bold.
#' @param width Float: Plot width. Default = NULL, i.e. set automatically
#' @param xlim Vector, length 2: x-axis limits. Default = NULL, i.e. set automatically
#' @param ylim Vector, length 2: y-axis limits.
#' @param asp Float: Plot aspect ratio.
#' @param labels_y Float: y coord for labels. Default = 1.55 (rhombi are fixed and range y .5 - 1.5)
#' @param label_cex Float: Character expansion for labels. Default = NULL, and is
#' calculated automatically based on length of `x`
#' @param mar Numeric vector, length 4: margin size.
#' @param filename Character: Path to save plot as PDF.
#' @param pdf_width Numeric: Width of PDF in inches.
#' @param pdf_height Numeric: Height of PDF in inches.
#'
#' @return Nothing, prints plot.
#'
#' @author EDG
#' @export
#'
#' @examples
#' previewcolor(get_palette("rtms"))
previewcolor <- function(
  x,
  main = NULL,
  bg = "#333333",
  main_col = "#b3b3b3",
  main_x = .7,
  main_y = 0.2,
  main_adj = 0,
  main_cex = .9,
  main_font = 2,
  width = NULL,
  xlim = NULL,
  ylim = c(0, 2.2),
  asp = 1,
  labels_y = 1.55,
  label_cex = NULL,
  mar = c(0, 0, 0, 1),
  filename = NULL,
  pdf_width = 8,
  pdf_height = 2.5
) {
  if (is.null(main)) {
    main <- deparse(substitute(x))
  }
  x <- unlist(x)

  par_orig <- par(no.readonly = TRUE)
  on.exit(par(par_orig))

  if (is.null(width)) {
    width <- max(3, .3 * length(x))
  }
  if (is.null(xlim)) {
    xlim <- c(0.3, width + .7)
  }
  if (!is.null(filename)) {
    grDevices::pdf(filename, pdf_width, pdf_height)
  }
  par(bg = bg, mar = mar, oma = c(0, 0, 0, 0), xaxs = "i", yaxs = "i")

  # Plot ----
  plot(
    NULL,
    NULL,
    asp = asp,
    axes = FALSE,
    xlim = xlim,
    ylim = ylim,
    xlab = NA,
    ylab = NA
  )

  if (length(x) >= 3) {
    xmid <- seq(1, width, length.out = length(x))
  } else if (length(x) == 2) {
    xmid <- c(.3333 * width, .6666 * width) + .5
  } else {
    xmid <- .5 * width + .5
  }

  for (i in seq(x)) {
    rhombus(xmid[i], 1, col = x[i])
  }

  # '- Labels ----
  if (is.null(label_cex)) {
    label_cex <- 1.30 - .02 * length(x)
    label_cex <- 1.314869 - 0.009163 * length(x)
  }

  if (is.null(names(x))) {
    labels <- as.character(x)
  } else {
    labels <- names(x)
  }
  text(
    xmid + .1,
    labels_y,
    labels,
    col = x,
    srt = 45,
    adj = 0,
    offset = 0,
    cex = label_cex,
    xpd = TRUE
  )

  # '- Title ----
  if (!is.null(main)) {
    text(
      main_x,
      main_y,
      main,
      col = main_col,
      adj = main_adj,
      font = main_font,
      cex = main_cex
    )
  }

  if (!is.null(filename)) {
    dev.off()
  }
} # /rtemis::previewcolor


rhombus <- function(
  xmid = 1,
  ymid = 1,
  width = 1,
  height = 1,
  col = "#80FFFF"
) {
  # left, top, right, bottom
  hw <- .5 * width
  hh <- .5 * height
  polygon(
    x = c(xmid - hw, xmid, xmid + hw, xmid),
    y = c(ymid, ymid + hh, ymid, ymid - hh),
    col = col,
    border = NA
  )
} # /rtemis::rhombus


#' Color Gradient
#'
#' Create a gradient of colors and optionally a colorbar
#'
#' It is best to provide an odd number, so that there is always an equal number of colors on either side
#' of the midpoint.
#' For example, if you want a gradient from -1 to 1 or equivalent, an n = 11, will give 5 colors on either
#' side of 0, each representing a 20\% change from the next.
#'
#' `colors` can be defined as a sequence of 3-letter color abbreviations of 2, 3, 4, or 5 colors
#'   which will correspond to values: \{"lo","hi"\}; \{"lo", "mid", "hi"\}; \{"lo", "mid", "midhi", "hi"\}, and
#'   \{"lo", "lomid", "mid", "midhi", "hi"\}, respectively.
#'   For example, try `colorgrad(21, "blugrnblkredyel", colorbar = TRUE)`
#'   3-letter color abbreviations:
#'    wht: white; blk: black; red; grn: green; blu: blue; yel: yellow; rng: orange; prl: purple
#'
#' @param n Integer: How many distinct colors you want. If not odd, converted to `n + 1`
#'   Defaults to 21
#' @param colors Character: Acts as a shortcut to defining `lo`, `mid`, etc for a number of defaults:
#'   "french", "penn", "grnblkred",
#' @param space Character: Which colorspace to use. Option: "rgb", or "Lab".
#'   Recommendation: If `mid` is "white" or "black" (default), use "rgb", otherwise "Lab"
#' @param lo Color for low end
#' @param lomid Color for low-mid
#' @param mid Color for middle of the range or "mean", which will result in `color_op(c(lo, hi), "mean")`.
#' If `mid = NA`, then only `lo` and `hi` are used to create the color gradient.
#' @param midhi Color for middle-high
#' @param hi Color for high end
#' @param preview Logical: Plot the colors horizontally
#' @param cb_n Integer: How many steps you would like in the colorbar
#' @param bar_min Numeric: Lowest value in colorbar
#' @param bar_mid Numeric: Middle value in colorbar
#' @param bar_max Numeric: Max value in colorbar
#' @param cex Float: Character expansion for axis
#' @param theme Theme object.
#' @param bg Color: Background color
#' @param col_text Color: Colorbar text color
#' @param plotlycb Logical: Create colorbar using `plotly` (instead of base R graphics)
#' @param plotly_width Float: Width for plotly colorbar.
#' @param plotly_height Float: Height for plotly colorbar.
#' @param return_plotly Logical: If TRUE, return `plotly` object
#' @param margins Vector: Plotly margins.
#' @param pad Float: Padding for `plotly`.
#'
#' @return Invisible vector of hexadecimal colors / plotly object if `return_plotly = TRUE`
#'
#' @author EDG
#' @keywords internal
#' @noRd
colorgrad <- function(
  n = 21L,
  colors = NULL,
  space = c("rgb", "Lab"),
  lo = rt_teal,
  lomid = NULL,
  mid = NULL,
  midhi = NULL,
  hi = rt_orange,
  preview = FALSE,
  cb_n = 21L,
  bar_min = -1,
  bar_mid = 0,
  bar_max = 1,
  cex = 1.2,
  theme = choose_theme(getOption("rtemis_theme")),
  bg = NULL,
  col_text = NULL,
  plotlycb = FALSE,
  plotly_width = 80,
  plotly_height = 500,
  return_plotly = FALSE,
  margins = c(0, 0, 0, 0),
  pad = 0L
) {
  # Arguments ----
  n <- as.integer(n)
  if (n %% 2 != 1) {
    n <- n + 1
  }

  if (return_plotly) {
    plotlycb <- TRUE
  }
  if (is.null(cb_n)) {
    cb_n <- n
    if (cb_n %% 2 != 1) cb_n <- cb_n + 1
  }
  space <- match.arg(space)
  theme <- if (strtrim(theme@name, 4) %in% c("dark", "blac")) {
    "dark"
  } else {
    "light"
  }

  # Colors ----
  if (!is.null(colors)) {
    if (colors == "french") {
      lo <- "#01256E"
      lomid <- NULL
      mid <- "white"
      midhi <- NULL
      hi <- "#95001A"
    } else if (colors == "penn") {
      lo <- "#02CFFF"
      lomid <- NULL
      mid <- "#01256E"
      midhi <- "#95001A"
      hi <- "#F2C100"
    } else if (colors == "blues") {
      lo <- "#01256E"
      mid <- NULL
      hi <- "#82AFD3"
    } else if (colors == "greens") {
      lo <- "#005200"
      mid <- NULL
      hi <- "#80DF80"
    } else {
      cols <- colorvec(cols = colors)
      lo <- cols$lo
      lomid <- cols$lomid
      mid <- cols$mid
      midhi <- cols$midhi
      hi <- cols$hi
    }
  }

  # Grad ----
  n <- as.integer(n)
  midpoint <- ceiling(n / 2)
  if (is.null(mid)) {
    mid <- ifelse(theme == "light", "white", "black")
  }
  if (!is.na(mid)) {
    if (mid == "mean") {
      mid <- color_op(c(lo, hi), "mean")
    }
    lo2mid <- colorRampPalette(c(lo, lomid, mid), space = space)
    mid2hi <- colorRampPalette(c(mid, midhi, hi), space = space)
    grad <- c(lo2mid(midpoint), mid2hi(n - midpoint + 1)[-1])
  } else {
    grad <- colorRampPalette(c(lo, hi), space = space)(n)
  }

  if (cb_n != n) {
    cb_n <- as.integer(cb_n)
    cb_midpoint <- ceiling(cb_n / 2)
    # if (is.null(mid)) mid <- color_op(c(lo, hi), "mean")
    # lo2mid <- grDevices::colorRampPalette(c(lo, lomid, mid), space = space)
    # mid2hi <- grDevices::colorRampPalette(c(mid, midhi, hi), space = space)
    if (!is.na(mid)) {
      cb_grad <- c(lo2mid(cb_midpoint), mid2hi(cb_n - cb_midpoint + 1)[-1])
    } else {
      cb_grad <- colorRampPalette(c(lo, hi), space = space)(cb_n)
    }
  } else {
    cb_grad <- grad
    cb_midpoint <- midpoint
  }

  # Preview ----
  if (preview) {
    plot(
      rep(1, n),
      col = grad,
      pch = 19,
      cex = 6,
      xlim = c(0.5, n + .5),
      ylim = c(.8, 1.2),
      ann = FALSE,
      axes = FALSE
    )
    text(
      x = 0.25,
      y = 1.05,
      labels = paste0("Color gradient (n = ", n, ")"),
      adj = 0,
      cex = 1.5
    )
    segments(midpoint, .95, midpoint, 1.05, lwd = 2, lty = 2, col = NA)
  }

  # Plotly cb ----
  if (plotlycb) {
    requireNamespace("plotly")

    m <- list(
      size = 40,
      color = grad,
      opacity = 1,
      symbol = "circle"
    )

    x.ax <- list(
      title = "",
      zeroline = FALSE,
      showline = FALSE,
      showticklabels = FALSE,
      showgrid = FALSE,
      range = c(0.8, 1.4)
    )

    y.ax <- list(
      title = "",
      zeroline = FALSE,
      showline = FALSE,
      showticklabels = FALSE,
      showgrid = FALSE
    )

    t <- list(
      family = "Open Sans",
      size = 22,
      color = plotly::toRGB("black")
    )

    a <- list()
    for (i in 1:3) {
      a[[i]] <- list(
        x = 1.3,
        y = c(1, midpoint, n)[i],
        text = as.character(c(bar_min, bar_mid, bar_max))[i],
        xref = "x",
        yref = "y",
        showarrow = FALSE
      )
    }

    hovtext <- ddSci(seq(bar_min, bar_max, (bar_max - bar_min) / (n - 1)))

    margin <- list(
      b = margins[1],
      l = margins[2],
      t = margins[3],
      r = margins[4],
      pad = pad
    )

    p <- plotly::plot_ly(
      x = rep(1, n),
      y = 1:n,
      type = "scatter",
      mode = "markers",
      marker = m,
      hoverinfo = "text",
      text = hovtext
    ) |>
      plotly::layout(
        xaxis = x.ax,
        yaxis = y.ax,
        width = plotly_width,
        height = plotly_height,
        annotations = a,
        font = t,
        margin = margin
      ) |>
      plotly::config(displayModeBar = FALSE)
    if (plotlycb && !return_plotly) print(p)
  }

  # out ----
  if (return_plotly) {
    return(p)
  }
  invisible(grad)
} # /rtemis::colorgrad


# 3-letter Color Name Abbreviations
# wht white
# blk black
# red
# grn green
# blu blue
# yel yellow
# rng orange
# prl purple

colorvec <- function(cols) {
  if (nchar(cols) %% 3 != 0) {
    cli::cli_abort(
      "All colors must be specified by their 3-letter abbreviations"
    )
  }

  cols <- tolower(cols)
  ncols <- nchar(cols) / 3
  cols <- lapply(seq(ncols), function(i) substr(cols, i * 3 - 2, i * 3))

  coldf <- data.frame(
    abbr = c("wht", "red", "grn", "blu", "blk", "yel", "rng", "prl"),
    name = c(
      "white",
      "red",
      "green",
      "blue",
      "black",
      "yellow",
      "orange",
      "purple"
    ),
    stringsAsFactors = FALSE
  )

  cols <- sapply(1:ncols, function(i) coldf[coldf[["abbr"]] == cols[i], 2])

  lo <- lomid <- mid <- midhi <- hi <- NULL
  collist <- list(
    twocols = c("lo", "hi"),
    threecols = c("lo", "mid", "hi"),
    fourcols = c("lo", "mid", "midhi", "hi"),
    fivecols = c("lo", "lomid", "mid", "midhi", "hi")
  )

  for (i in seq(ncols)) {
    assign(collist[[ncols - 1]][i], cols[i])
  }
  list(lo = lo, lomid = lomid, mid = mid, midhi = midhi, hi = hi)
}

autoalpha <- function(x, gamma = .0008, min = .3) {
  max(min, 1 - x * gamma)
}


================================================
FILE: R/utils_data.R
================================================
# utils_data.R
# ::rtemis::
# EDG rtemis.org

# %% Public ----------------------------------------------------------------------------------------

#' Describe factor
#'
#' Outputs a single character with names and counts of each level of the input factor.
#'
#' @param x factor.
#' @param ... See details.
#'
#' @details
#' Extra arguments:
#' - `max_n`: Integer: Return counts for up to this many levels.
#' - `return_ordered`: Logical: If TRUE, return levels ordered by count, otherwise return in level order.
#' - `verbosity`: Integer: Verbosity level.
#'
#' @return Character with level counts.
#'
#' @author EDG
#' @noRd
#'
#' @examples
#' # Small number of levels
#' describe(iris[["Species"]])
#'
#' # Large number of levels: show top n by count
#' x <- factor(sample(letters, 1000, TRUE))
#' describe(x)
#' describe(x, 3)
#' describe(x, 3, return_ordered = FALSE)
method(describe, class_factor) <- function(
  x,
  max_n = 5,
  return_ordered = TRUE,
  verbosity = 1L
) {
  x <- factor(x)
  x_levels <- levels(x)
  n_unique <- length(x_levels)
  x_freqs <- as.integer(table(x))
  if (return_ordered) {
    idi <- order(x_freqs, decreasing = TRUE)
  }

  if (n_unique <= max_n) {
    if (return_ordered) {
      out <- paste(x_levels[idi], x_freqs[idi], sep = ": ", collapse = "; ")
    } else {
      out <- paste(x_levels, x_freqs, sep = ": ", collapse = "; ")
    }
  } else {
    idi <- order(x_freqs, decreasing = TRUE)
    if (return_ordered) {
      idi <- idi[seq_len(max_n)]
      out <- paste0(
        "(Top ",
        max_n,
        " of ",
        n_unique,
        ") ",
        paste(x_levels[idi], x_freqs[idi], sep = ": ", collapse = "; ")
      )
    } else {
      idx <- seq_len(max_n)
      out <- paste0(
        "(First ",
        max_n,
        " of ",
        n_unique,
        ") ",
        paste(x_levels[idx], x_freqs[idx], sep = ": ", collapse = "; ")
      )
    }
  }
  if (verbosity > 0L) {
    print(out)
  }
  invisible(out)
} # /rtemis::describe.factor


#' Match cases by covariates
#'
#' Find one or more cases from a `pool` data.frame that match cases in a target
#' data.frame. Match exactly and/or by distance (sum of squared distances).
#'
#' @param target data.frame you are matching against.
#' @param pool data.frame you are looking for matches from.
#' @param n_matches Integer: Number of matches to return.
#' @param target_id Character: Column name in `target` that holds unique
#' cases IDs. Default = NULL, in which case integer case numbers will be used.
#' @param pool_id Character: Same as `target_id` for `pool`.
#' @param exactmatch_factors Logical: If TRUE, selected cases will have to
#' exactly match factors available in `target`.
#' @param exactmatch_cols Character: Names of columns that should be matched
#' exactly.
#' @param distmatch_cols Character: Names of columns that should be
#' distance-matched.
#' @param norepeats Logical: If TRUE, cases in `pool` can only be chosen
#' once.
#' @param ignore_na Logical: If TRUE, ignore NA values during exact matching.
#' @param verbosity Integer: Verbosity level.
#'
#' @return data.frame
#'
#' @author EDG
#' @export
#'
#' @examples
#' set.seed(2021)
#' cases <- data.frame(
#'   PID = paste0("PID", seq(4)),
#'   Sex = factor(c(1, 1, 0, 0)),
#'   Handedness = factor(c(1, 1, 0, 1)),
#'   Age = c(21, 27, 39, 24),
#'   Var = c(.7, .8, .9, .6),
#'   Varx = rnorm(4)
#' )
#' controls <- data.frame(
#'   CID = paste0("CID", seq(50)),
#'   Sex = factor(sample(c(0, 1), 50, TRUE)),
#'   Handedness = factor(sample(c(0, 1), 50, TRUE, c(.1, .9))),
#'   Age = sample(16:42, 50, TRUE),
#'   Var = rnorm(50),
#'   Vary = rnorm(50)
#' )
#'
#' mc <- matchcases(cases, controls, 2, "PID", "CID")
matchcases <- function(
  target,
  pool,
  n_matches = 1,
  target_id = NULL,
  pool_id = NULL,
  exactmatch_factors = TRUE,
  exactmatch_cols = NULL,
  distmatch_cols = NULL,
  norepeats = TRUE,
  ignore_na = FALSE,
  verbosity = 1L
) {
  ntarget <- nrow(target)
  npool <- nrow(pool)

  # Get IDs
  if (is.null(target_id)) {
    targetID <- seq(ntarget)
  } else {
    targetID <- target[, target_id]
    target[, target_id] <- NULL
  }
  if (is.null(pool_id)) {
    poolID <- seq(npool)
  } else {
    poolID <- pool[, pool_id]
    pool[, pool_id] <- NULL
  }

  # exact- & dist-matched column names
  if (is.null(exactmatch_cols) && exactmatch_factors) {
    exactmatch_cols <- colnames(target)[sapply(target, is.factor)]
  }
  # Keep exactmatch_cols present in pool
  exactmatch_cols <- exactmatch_cols[exactmatch_cols %in% colnames(pool)]

  if (is.null(distmatch_cols)) {
    distmatch_cols <- colnames(target)[!colnames(target) %in% exactmatch_cols]
  }
  # Keep distmatch_cols present in pool
  distmatch_cols <- distmatch_cols[distmatch_cols %in% colnames(pool)]

  # Remove unused columns, if any
  .remove <- colnames(target)[
    !colnames(target) %in% c(exactmatch_cols, distmatch_cols)
  ]
  target[, .remove] <- NULL
  .remove <- colnames(pool)[
    !colnames(pool) %in% c(exactmatch_cols, distmatch_cols)
  ]
  pool[, .remove] <- NULL

  # Convert all non-exact-matching to numeric
  tonumeric <- distmatch_cols[!sapply(target[, distmatch_cols], is.numeric)]
  if (length(tonumeric) > 0) {
    target[, tonumeric] <- lapply(target[, tonumeric, drop = FALSE], as.numeric)
  }
  tonumeric <- distmatch_cols[!sapply(pool[, distmatch_cols], is.numeric)]
  if (length(tonumeric) > 0) {
    pool[, tonumeric] <- lapply(pool[, tonumeric, drop = FALSE], as.numeric)
  }

  # Normalize all
  vcat <- rbind(target, pool)
  vcat[, distmatch_cols] <- lapply(vcat[, distmatch_cols, drop = FALSE], scale)
  target_s <- cbind(targetID = targetID, vcat[seq(ntarget), ])
  pool_s <- cbind(poolID = poolID, vcat[-seq(ntarget), ])
  rm(vcat)

  # For each target, select matches on categoricals,
  # then order pool by distance.
  mc <- data.frame(targetID = targetID, match = matrix(NA, ntarget, n_matches))
  for (i in seq(ntarget)) {
    if (verbosity > 0L) {
      msg("Working on case", i, "of", ntarget)
    }
    if (is.null(exactmatch_cols)) {
      subpool <- pool_s
    } else {
      ind <- sapply(seq_len(nrow(pool_s)), function(j) {
        all(
          target_s[i, exactmatch_cols] == pool_s[j, exactmatch_cols],
          na.rm = ignore_na
        )
      })
      subpool <- pool_s[ind, , drop = FALSE]
    }
    distord <- order(sapply(
      seq_len(nrow(subpool)),
      function(j) {
        mse(
          unlist(target_s[i, distmatch_cols]),
          unlist(subpool[j, distmatch_cols]),
          na.rm = ignore_na
        )
      }
    ))
    n_matched <- min(n_matches, nrow(subpool))
    mc[i, 2:(n_matched + 1)] <- subpool[, 1][distord[seq(n_matched)]]
    if (norepeats) {
      pool_s <- pool_s[!pool_s[, 1] %in% mc[i, 2:(n_matches + 1)], ]
    }
  }

  mc
} # /rtemis::matchcases


#' Index columns by attribute name & value
#'
#' @param x tabular data.
#' @param name Character: Name of attribute.
#' @param value Character: Value of attribute.
#' @param exact Logical: Passed to `attr` when retrieving attribute value. If `TRUE`, attribute
#' name must match `name` exactly, otherwise, partial match is allowed.
#'
#' @return Integer vector.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(
#'   id = 1:5,
#'   sbp = rnorm(5, 120, 15),
#'   dbp = rnorm(5, 80, 10),
#'   paO2 = rnorm(5, 90, 10),
#'   paCO2 = rnorm(5, 40, 5)
#' )
#' setattr(x[["sbp"]], "source", "outpatient")
#' setattr(x[["dbp"]], "source", "outpatient")
#' setattr(x[["paO2"]], "source", "icu")
#' setattr(x[["paCO2"]], "source", "icu")
#' index_col_by_attr(x, "source", "icu")
index_col_by_attr <- function(x, name, value, exact = TRUE) {
  colattr <- lapply(x, \(i) attr(i, name, exact = exact))
  # Convert to character vector maintaining NULL values (where attribute is not set)
  colattr <- sapply(colattr, function(i) {
    if (is.null(i)) NA_character_ else as.character(i)
  })
  which(colattr == value)
} # /rtemis.utils::index_col_by_attr


#' Tabulate column attributes
#'
#' @param x tabular data: Input data set.
#' @param attr Character: Attribute to get
#' @param useNA Character: Passed to `table`
#'
#' @return table.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(
#'   id = 1:5,
#'   sbp = rnorm(5, 120, 15),
#'   dbp = rnorm(5, 80, 10),
#'   paO2 = rnorm(5, 90, 10),
#'   paCO2 = rnorm(5, 40, 5)
#' )
#' setattr(x[["sbp"]], "source", "outpatient")
#' setattr(x[["dbp"]], "source", "outpatient")
#' setattr(x[["paO2"]], "source", "icu")
#' setattr(x[["paCO2"]], "source", "icu")
#' table_column_attr(x, "source")
table_column_attr <- function(x, attr = "source", useNA = "always") {
  attrs <- sapply(x, \(i) {
    if (is.null(attr(i, attr, exact = TRUE))) {
      NA_character_
    } else {
      attr(i, attr, exact = TRUE)
    }
  })
  table(attrs, useNA = useNA)
} # /rtemis::table_column_attr


#' List column names by class
#'
#' @param x tabular data.
#' @param sorted Logical: If TRUE, sort the output
#' @param item_format Function: Function to format each item
#' @param maxlength Integer: Maximum number of items to print
#'
#' @return `NULL`, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' names_by_class(iris)
names_by_class <- function(
  x,
  sorted = TRUE,
  item_format = highlight,
  maxlength = 24
) {
  classes <- sapply(x, class)
  vals <- unique(classes)
  out <- if (sorted) {
    sapply(vals, \(i) sort(names(x)[classes == i]))
  } else {
    sapply(vals, \(i) names(x)[classes == i])
  }
  cat(repr_ls(out, item_format = item_format, maxlength = maxlength))
  invisible()
} # /rtemis::names_by_class


#' Inspect character and factor vector
#'
#' Checks character or factor vector to determine whether it might be best to convert to
#' numeric.
#'
#' @details
#' All data can be represented as a character string. A numeric variable may be read as
#' a character variable if there are non-numeric characters in the data.
#' It is important to be able to automatically detect such variables and convert them,
#' which would mean introducing NA values.
#'
#' @param x Character or factor vector.
#' @param xname Character: Name of input vector `x`.
#' @param verbosity Integer: Verbosity level.
#' @param thresh Numeric: Threshold for determining whether to convert to numeric.
#' @param na.omit Logical: If TRUE, remove NA values before checking.
#'
#' @return Character.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- c("3", "5", "undefined", "21", "4", NA)
#' inspect_type(x)
#' z <- c("mango", "banana", "tangerine", NA)
#' inspect_type(z)
inspect_type <- function(
  x,
  xname = NULL,
  verbosity = 1L,
  thresh = .5,
  na.omit = TRUE
) {
  if (is.null(xname)) {
    xname <- deparse(substitute(x))
  }
  if (na.omit) {
    x <- na.omit(x)
  }
  xclass <- class(x)[1]
  xlen <- length(x)
  raw_na <- sum(is.na(x))
  n_non_na <- xlen - raw_na
  # char_na <- sum(is.na(as.character(x)))
  suppressWarnings({
    num_na <- if (xclass == "character") {
      sum(is.na(as.numeric(x)))
    } else {
      sum(is.na(as.numeric(as.character(x))))
    }
  })
  if (raw_na == xlen) {
    "NA"
  } else if (
    xclass %in% c("character", "factor") && (num_na / n_non_na) < thresh
  ) {
    if (verbosity > 0L) {
      msg0(
        "Possible type error: ",
        highlight(xname),
        " is a ",
        bold(xclass),
        ", but perhaps should be ",
        bold("numeric"),
        "."
      )
    }
    "numeric"
  } else {
    xclass
  }
} # /rtemis::inspect_type


================================================
FILE: R/utils_data.table.R
================================================
# utils_data.table.R
# ::rtemis::
# 2022- EDG rtemis.org

#' Number of unique values per feature
#'
#' @param x data.table: Input data.table.
#' @param excludeNA Logical: If TRUE, exclude NA values.
#' @param limit Integer: Print up to this many features. Set to -1L to print all.
#' @param verbosity Integer: If > 0, print output to console.
#'
#' @return Named integer vector of length `NCOL(x)` with number of unique values per column/feature, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' ir <- as.data.table(iris)
#' dt_nunique_perfeat(ir)
dt_nunique_perfeat <- function(
  x,
  excludeNA = FALSE,
  limit = 20L,
  verbosity = 1L
) {
  stopifnot(inherits(x, "data.table"))
  nupf <- sapply(x, \(i) data.table::uniqueN(i, na.rm = excludeNA))
  if (verbosity > 0L) {
    printls(nupf, item_format = thin, limit = limit, print_class = FALSE)
  }
  invisible(nupf)
} # /rtemis::dt_nunique_perfeat


#' Long to wide key-value reshaping
#'
#' Reshape a long format `data.table` using key-value pairs with
#' `data.table::dcast`
#'
#' @param x `data.table` object.
#' @param id_name Character: Name of column in `x` that defines the IDs
#' identifying individual rows.
#' @param key_name Character: Name of column in `x` that holds the key.
#' @param positive Numeric or Character: Used to fill id ~ key combination
#' present in the long format input `x`.
#' @param negative Numeric or Character: Used to fill id ~ key combination
#' NOT present in the long format input `x`.
#' @param xname Character: Name of `x` to be used in messages.
#' @param verbosity Integer: Verbosity level.
#'
#' @return `data.table` in wide format.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(
#'   ID = rep(1:3, each = 2),
#'   Dx = c("A", "C", "B", "C", "D", "A")
#' )
#' dt_keybin_reshape(x, id_name = "ID", key_name = "Dx")
dt_keybin_reshape <- function(
  x,
  id_name,
  key_name,
  positive = 1,
  negative = 0,
  xname = NULL,
  verbosity = 1L
) {
  if (is.null(xname)) {
    xname <- deparse(substitute(x))
  }
  stopifnot(inherits(x, "data.table"))
  x <- copy(x)

  # Assign positive value to all in long form
  value_name <- "Bin__"
  x[, (value_name) := positive]

  .formula <- as.formula(paste(
    paste(id_name, collapse = " + "),
    "~",
    key_name
  ))
  if (verbosity > 0L) {
    msg("Reshaping", highlight(xname), "to wide format...")
    catsize(x, "Input size")
  }
  # Reshape to wide, filling all absent with negative value
  x <- dcast(
    x,
    .formula,
    fun.aggregate = length,
    value.var = value_name,
    drop = FALSE,
    fill = negative
  )

  if (verbosity > 0L) {
    catsize(x, "Output size")
  }
  x
} # /rtemis::dt_keybin_reshape


#' Merge data.tables
#'
#' @param left data.table
#' @param right data.table
#' @param on Character: Name of column to join on.
#' @param left_on Character: Name of column on left table.
#' @param right_on Character: Name of column on right table.
#' @param how Character: Type of join: "inner", "left", "right", "outer".
#' @param left_name Character: Name of left table.
#' @param right_name Character: Name of right table.
#' @param left_suffix Character: If provided, add this suffix to all left column names,
#' excluding on/left_on.
#' @param right_suffix Character: If provided, add this suffix to all right column names,
#' excluding on/right_on.
#' @param verbosity Integer: Verbosity level.
#' @param ... Additional arguments to be passed to `data.table::merge`.
#'
#' @return Merged data.table.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' xleft <- data.table(ID = 1:5, Alpha = letters[1:5])
#' xright <- data.table(ID = c(3, 4, 5, 6), Beta = LETTERS[3:6])
#' xlr_inner <- dt_merge(xleft, xright, on = "ID", how = "inner")
dt_merge <- function(
  left,
  right,
  on = NULL,
  left_on = NULL,
  right_on = NULL,
  how = "left",
  left_name = NULL,
  right_name = NULL,
  left_suffix = NULL,
  right_suffix = NULL,
  verbosity = 1L,
  ...
) {
  if (is.null(left_name)) {
    left_name <- deparse(substitute(left))
  }
  if (is.null(right_name)) {
    right_name <- deparse(substitute(right))
  }
  if (is.null(left_on)) {
    left_on <- on
  }
  if (is.null(right_on)) {
    right_on <- on
  }
  if (verbosity > 0L) {
    icon <- switch(
      how,
      inner = "\u2A1D",
      left = "\u27D5",
      right = "\u27D6",
      "\u27D7"
    )
    if (left_on == right_on) {
      msg0(
        bold(highlight(icon)),
        " Merging ",
        highlight(left_name),
        " & ",
        highlight(right_name),
        " on ",
        highlight(left_on),
        "..."
      )
    } else {
      msg0(
        bold(highlight(icon)),
        " Merging ",
        highlight(left_name),
        " & ",
        highlight(right_name),
        " on ",
        highlight(left_on),
        " & ",
        highlight(right_on),
        "..."
      )
    }

    catsize(left, left_name)
    catsize(right, right_name)
  }

  if (how == "left") {
    all.x <- TRUE
    all.y <- FALSE
  } else if (how == "right") {
    all.x <- FALSE
    all.y <- TRUE
  } else if (how == "inner") {
    all.x <- FALSE
    all.y <- FALSE
  } else {
    all.x <- all.y <- TRUE
  }
  if (!is.null(left_suffix)) {
    left_names <- setdiff(names(left), left_on)
    setnames(left, left_names, paste0(left_names, left_suffix))
  }
  if (!is.null(right_suffix)) {
    right_names <- setdiff(names(right), right_on)
    setnames(right, right_names, paste0(right_names, right_suffix))
  }
  dat <- merge(
    left,
    right,
    by.x = left_on,
    by.y = right_on,
    all.x = all.x,
    all.y = all.y,
    ...
  )
  if (verbosity > 0L) {
    catsize(dat, "Merged")
  }
  dat
} # /rtemis::dt_merge


#' Clean factor levels of data.table ***in-place***
#'
#' Finds all factors in a data.table and cleans factor levels to include
#' only underscore symbols
#'
#' @param x data.table: Input data.table. Will be modified ***in-place***.
#' @param prefix_digits Character: If not NA, add this prefix to all factor levels that
#' are numbers
#'
#' @return Nothing, modifies `x` ***in-place***.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- as.data.table(iris)
#' levels(x[["Species"]]) <- c("setosa:iris", "versicolor$iris", "virginica iris")
#' levels(x[["Species"]])
#' dt_set_cleanfactorlevels(x)
#' levels(x[["Species"]])
dt_set_cleanfactorlevels <- function(x, prefix_digits = NA) {
  stopifnot(inherits(x, "data.table"))
  idi <- names(x)[sapply(x, is.factor)]
  for (i in idi) {
    x[,
      (i) := factor(
        x[[i]],
        labels = clean_names(levels(x[[i]]), prefix_digits = prefix_digits)
      )
    ]
  }
} # /rtemis::dt_set_cleanfactorlevels


#' Get N and percent match of values between two columns of two data.tables
#'
#' @param x data.table: First input data.table.
#' @param y data.table: Second input data.table.
#' @param on Integer or character: column to read in `x` and `y`, if it is the
#' same
#' @param left_on Integer or character: column to read in `x`
#' @param right_on Integer or character: column to read in `y`
#' @param verbosity Integer: Verbosity level.
#'
#' @return list.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(ID = 1:5, Alpha = letters[1:5])
#' y <- data.table(ID = c(3, 4, 5, 6), Beta = LETTERS[3:6])
#' dt_pctmatch(x, y, on = "ID")
dt_pctmatch <- function(
  x,
  y,
  on = NULL,
  left_on = NULL,
  right_on = NULL,
  verbosity = 1L
) {
  if (is.null(left_on)) {
    left_on <- on
  }
  if (is.null(right_on)) {
    right_on <- on
  }
  xv <- unique(x[[left_on]])
  n <- length(xv)
  yv <- unique(y[[right_on]])
  nmatch <- sum(xv %in% yv)
  matchpct <- nmatch / n * 100
  if (verbosity > 0L) {
    by_final <- paste(unique(c(left_on, right_on)), collapse = ", ")
    msg0(
      "Matched ",
      highlight(nmatch),
      "/",
      highlight(n),
      " on ",
      bold(by_final),
      " (",
      highlight(ddSci(matchpct)),
      "%)"
    )
  }
  invisible(list(nmatch = nmatch, matchpct = matchpct))
} # /rtemis::dt_pctmatch


#' Get percent of missing values from every column
#'
#' @param x data.frame or data.table
#' @param verbosity Integer: Verbosity level.
#'
#' @return list
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(a = c(1, 2, NA, 4), b = c(NA, NA, 3, 4), c = c("A", "B", "C", NA))
#' dt_pctmissing(x)
dt_pctmissing <- function(x, verbosity = 1L) {
  nmissing <- sapply(x, \(i) sum(is.na(i)))
  pctmissing <- nmissing / NROW(x)
  if (verbosity > 0L) {
    cat("Percent missing per column:\n")
    printls(pctmissing, print_class = FALSE)
  }
  invisible(list(nmissing = nmissing, pctmissing = pctmissing))
} # /rtemis::dt_pctmissing


#' Convert data.table logical columns to factors
#'
#' Convert data.table logical columns to factors with custom labels ***in-place***
#'
#' @param x data.table: Input data.table. Will be modified ***in-place***.
#' @param cols Optional Integer or character: columns to convert. If NULL, operates on all
#' logical columns.
#' @param labels Character: labels for factor levels.
#' @param maintain_attributes Logical: If TRUE, maintain column attributes.
#' @param fillNA Optional Character: If not NULL, fill NA values with this constant.
#'
#' @return data.table, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(a = 1:5, b = c(TRUE, FALSE, FALSE, FALSE, TRUE))
#' x
#' dt_set_logical2factor(x)
#' x
#' z <- data.table(
#'   alpha = 1:5,
#'   beta = c(TRUE, FALSE, TRUE, NA, TRUE),
#'   gamma = c(FALSE, FALSE, TRUE, FALSE, NA)
#' )
#' # You can usee fillNA to fill NA values with a constant
#' dt_set_logical2factor(z, cols = "beta", labels = c("No", "Yes"), fillNA = "No")
#' z
#' w <- data.table(mango = 1:5, banana = c(FALSE, FALSE, TRUE, TRUE, FALSE))
#' w
#' dt_set_logical2factor(w, cols = 2, labels = c("Ugh", "Huh"))
#' w
#' # Column attributes are maintained by default:
#' z <- data.table(
#'   alpha = 1:5,
#'   beta = c(TRUE, FALSE, TRUE, NA, TRUE),
#'   gamma = c(FALSE, FALSE, TRUE, FALSE, NA)
#' )
#' for (i in seq_along(z)) setattr(z[[i]], "source", "Guava")
#' str(z)
#' dt_set_logical2factor(z, cols = "beta", labels = c("No", "Yes"))
#' str(z)
dt_set_logical2factor <- function(
  x,
  cols = NULL,
  labels = c("False", "True"),
  maintain_attributes = TRUE,
  fillNA = NULL
) {
  if (is.null(cols)) {
    cols <- names(x)[sapply(x, is.logical)]
  }
  for (i in cols) {
    if (maintain_attributes) {
      .attr <- attributes(x[[i]])
    }
    x[, (i) := factor(x[[i]], levels = c(FALSE, TRUE), labels = labels)]
    if (!is.null(fillNA)) {
      x[is.na(x[[i]]), (i) := fillNA]
    }
    if (maintain_attributes) {
      for (j in seq_along(.attr)) {
        setattr(x[[i]], names(.attr)[j], .attr[[j]])
      }
    }
  }
  invisible(x)
}


#' Inspect column types
#'
#' Will attempt to identify columns that should be numeric but are either character or
#' factor by running [inspect_type] on each column.
#'
#' @param x data.table: Input data.table.
#' @param cols Character vector: columns to inspect.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Character vector.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(
#'   id = 8001:8006,
#'   a = c("3", "5", "undefined", "21", "4", NA),
#'   b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"),
#'   c = c(1, 2, 3, 4, 5, 6)
#' )
#' dt_inspect_types(x)
dt_inspect_types <- function(x, cols = NULL, verbosity = 1L) {
  if (is.null(cols)) {
    char_factor_idi <- which(sapply(x, is.character) | sapply(x, is.factor))
    cols <- names(x[, .SD, .SDcols = char_factor_idi])
  }
  current_types <- sapply(x[, .SD, .SDcols = cols], class)
  suggested_types <- sapply(
    cols,
    \(cn) inspect_type(x[[cn]], xname = cn, verbosity = verbosity)
  )
  to_convert <- suggested_types != current_types
  names(to_convert)[to_convert]
}


#' Set column types automatically
#'
#' This function inspects a data.table and attempts to identify columns that should be
#' numeric but have been read in as character, and fixes their type ***in-place***.
#' This can happen when one or more fields contain non-numeric characters, for example.
#'
#' @param x data.table: Input data.table. Will be modified ***in-place***, if needed.
#' @param cols Character vector: columns to work on. If not defined, will work on all
#' columns
#' @param verbosity Integer: Verbosity level.
#'
#' @return data.table, invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(
#'   id = 8001:8006,
#'   a = c("3", "5", "undefined", "21", "4", NA),
#'   b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"),
#'   c = c(1, 2, 3, 4, 5, 6)
#' )
#' str(x)
#' # ***in-place*** operation means no assignment is needed
#' dt_set_autotypes(x)
#' str(x)
#'
#' # Try excluding column 'a' from autotyping
#' x <- data.table(
#'   id = 8001:8006,
#'   a = c("3", "5", "undefined", "21", "4", NA),
#'   b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"),
#'   c = c(1, 2, 3, 4, 5, 6)
#' )
#' str(x)
#' # exclude column 'a' from autotyping
#' dt_set_autotypes(x, cols = setdiff(names(x), "a"))
#' str(x)
dt_set_autotypes <- function(x, cols = NULL, verbosity = 1L) {
  if (is.null(cols)) {
    cols <- names(x)
  }
  character_idx <- sapply(x[, .SD, .SDcols = cols], is.character)
  char_cols <- names(character_idx)[character_idx]
  for (i in char_cols) {
    if (inspect_type(x[[i]], i, verbosity = 0L) == "numeric") {
      if (verbosity > 0L) {
        msg("Converting", highlight(i), "to", bold("numeric"))
      }
      # This will generate warnings if there are non-numeric values
      suppressWarnings({
        x[, (i) := as.numeric(x[[i]])]
      })
    }
  }
  invisible(x)
} # /rtemis::dt_set_autotypes


#' List column names by attribute
#'
#' @param x data.table: Input data.table.
#' @param attribute Character: name of attribute.
#' @param exact Logical: If TRUE, use exact matching.
#' @param sorted Logical: If TRUE, sort the output.
#'
#' @return Character vector.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- data.table(
#'   id = 1:5,
#'   sbp = rnorm(5, 120, 15),
#'   dbp = rnorm(5, 80, 10),
#'   paO2 = rnorm(5, 90, 10),
#'   paCO2 = rnorm(5, 40, 5)
#' )
#' setattr(x[["id"]], "source", "demographics")
#' setattr(x[["sbp"]], "source", "outpatient")
#' setattr(x[["dbp"]], "source", "outpatient")
#' setattr(x[["paO2"]], "source", "icu")
#' setattr(x[["paCO2"]], "source", "icu")
#'
#' dt_names_by_attr(x, "source", "outpatient")
dt_names_by_attr <- function(x, attribute, exact = TRUE, sorted = TRUE) {
  attrs <- unlist(lapply(x, \(i) attr(i, attribute)))
  attrs <- sapply(x, \(i) {
    .attr <- attr(i, attribute, exact = exact)
    if (is.null(.attr)) "NA" else .attr
  })
  vals <- unique(attrs)
  if (sorted) {
    sapply(vals, \(i) sort(names(x)[attrs == i]))
  } else {
    sapply(vals, \(i) names(x)[attrs == i])
  }
} # /rtemis::dt_names_by_attr


#' Clean column names and factor levels ***in-place***
#'
#' @param x data.table: Input data.table. Will be modified ***in-place***, if needed.
#' @param prefix_digits Character: prefix to add to names beginning with a
#' digit. Set to NA to skip
#'
#' @return Nothing, modifies `x` ***in-place***.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' x <- as.data.table(iris)
#' levels(x[["Species"]]) <- c("setosa:iris", "versicolor$iris", "virginica iris")
#' names(x)
#' levels(x[["Species"]])
#' # ***in-place*** operation means no assignment is needed
#' dt_set_clean_all(x)
#' names(x)
#' levels(x[["Species"]])
dt_set_clean_all <- function(x, prefix_digits = NA) {
  if (!is.data.table(x)) {
    cli::cli_abort("{.arg x} must be a data.table")
  }
  data.table::setnames(x, names(x), clean_colnames(x))
  idi <- names(x)[sapply(x, is.factor)]
  for (i in idi) {
    x[,
      (i) := factor(
        x[[i]],
        labels = clean_names(levels(x[[i]]), prefix_digits = prefix_digits)
      )
    ]
  }
} # /rtemis::dt_set_clean_all


#' Describe data.table
#'
#' @param x data.table: Input data.table.
#' @param verbosity Integer: If > 0, print output to console.
#'
#' @return List with three data.tables: Numeric, Categorical, and Date.
#'
#' @author EDG
#' @export
#'
#' @examples
#' library(data.table)
#' origin <- as.POSIXct("2022-01-01 00:00:00", tz = "America/Los_Angeles")
#' x <- data.table(
#'   ID = paste0("ID", 1:10),
#'   V1 = rnorm(10),
#'   V2 = rnorm(10, 20, 3),
#'   V1_datetime = as.POSIXct(
#'     seq(
#'       1, 1e7,
#'       length.out = 10
#'     ),
#'     origin = origin
#'   ),
#'   V2_datetime = as.POSIXct(
#'     seq(
#'       1, 1e7,
#'       length.out = 10
#'     ),
#'     origin = origin
#'   ),
#'   C1 = sample(c("alpha", "beta", "gamma"), 10, TRUE),
#'   F1 = factor(sample(c("delta", "epsilon", "zeta"), 10, TRUE))
#' )
dt_describe <- function(x, verbosity = 1L) {
  if (!is.data.table(x)) {
    cli::cli_abort("{.arg x} must be a data.table")
  }
  nrows <- NROW(x)

  # appease R CMD check: do not use ..var in DT frame, use with = FALSE instead

  # Numeric
  index_nm <- which(sapply(x, is.numeric))

  nm_summary <- if (length(index_nm) > 0) {
    data.frame(
      Variable = x[, index_nm, with = FALSE] |> names(),
      Min = sapply(x[, index_nm, with = FALSE], min, na.rm = TRUE),
      Max = sapply(x[, index_nm, with = FALSE], max, na.rm = TRUE),
      Median = sapply(x[, index_nm, with = FALSE], median, na.rm = TRUE),
      Mean = sapply(x[, index_nm, with = FALSE], mean, na.rm = TRUE),
      SD = sapply(x[, index_nm, with = FALSE], sd, na.rm = TRUE),
      Pct_missing = sapply(
        x[, index_nm, with = FALSE],
        \(col) sum(is.na(col)) / nrows
      )
    )
  } else {
    data.frame(
      Variable = character(),
      Min = numeric(),
      Max = numeric(),
      Median = numeric(),
      Mean = numeric(),
      SD = numeric(),
      Pct_missing = numeric()
    )
  }

  # Characters & factors
  index_cf <- c(which(sapply(x, is.character)), which(sapply(x, is.factor)))

  cf_summary <- if (length(index_cf) > 0) {
    data.frame(
      Variable = x[, index_cf, with = FALSE] |> names(),
      N_unique = sapply(
        x[, index_cf, with = FALSE],
        \(col) length(unique(col))
      ),
      Mode = sapply(x[, index_cf, with = FALSE], get_mode),
      Counts = sapply(x[, index_cf, with = FALSE], describe),
      Pct_missing = sapply(
        x[, index_cf, with = FALSE],
        \(col) sum(is.na(col)) / nrows
      )
    )
  } else {
    data.frame(
      Variable = numeric(),
      N_unique = integer(),
      Mode = character(),
      Counts = character(),
      Pct_missing = numeric()
    )
  }

  # Dates
  index_dt <- which(sapply(
    x,
    \(col) any(class(col) %in% c("Date", "IDate", "POSIXct", "POSIXt"))
  ))

  dt_summary <- if (length(index_dt) > 0) {
    data.frame(
      Variable = x[, index_dt, with = FALSE] |> names(),
      Min = do.call(c, lapply(x[, index_dt, with = FALSE], min, na.rm = TRUE)),
      Max = do.call(c, lapply(x[, index_dt, with = FALSE], max, na.rm = TRUE)),
      Median = do.call(
        c,
        lapply(x[, index_dt, with = FALSE], median, na.rm = TRUE)
      ),
      Mean = do.call(
        c,
        lapply(x[, index_dt, with = FALSE], mean, na.rm = TRUE)
      ),
      Pct_missing = sapply(
        x[, index_dt, with = FALSE],
        \(col) sum(is.na(col)) / nrows
      )
    )
  } else {
    data.frame(
      Variable = character(),
      Min = numeric(),
      Max = numeric(),
      Median = numeric(),
      Mean = numeric(),
      Pct_missing = numeric()
    )
  }

  out <- list(
    Numeric = nm_summary,
    Categorical = cf_summary,
    Date = dt_summary
  )
  if (verbosity > 0L) {
    printls(out, print_df = TRUE)
  }
  invisible(out)
} # /rtemis::dt_describe


================================================
FILE: R/utils_date.R
================================================
# utils_date.R
# ::rtemis::
# 2024- EDG rtemis.org

#' Extract features from dates
#'
#' @details weekday and month will be extracted as factors, year as integer
#'
#' @param dates Date vector.
#' @param features Character vector: features to extract.
#' @param drop_dates Logical: If TRUE, drop original date column.
#'
#' @return data.table with extracted features
#'
#' @author EDG
#' @keywords internal
#' @noRd
dates2features <- function(
  dates,
  features = c("weekday", "month", "year"),
  drop_dates = TRUE
) {
  # appease R CMD check
  weekday <- NULL
  # to factors: dow, month
  dt <- data.table(dates = dates)
  if ("weekday" %in% features) {
    dt[, weekday := factor(weekdays(dates))]
  }
  if ("month" %in% features) {
    dt[, month := factor(months(dates))]
  }
  if ("year" %in% features) {
    dt[, year := year(dates)]
  }
  if (drop_dates) {
    dt[, dates := NULL]
  }
  dt
} # /rtemis::dates2features


#' Get holidays from date vector
#'
#' @param dates Date vector
#' @param holidays Character vector: holidays to extract
#'
#' @return Factor of length `length(dates)` with levels "Not Holiday", "Holiday"
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_holidays <- function(
  dates,
  holidays = c("LaborDay", "NewYearsDay", "ChristmasDay")
) {
  # Get years from dates
  years <- unique(data.table::year(dates))
  # Get all holidays in all years
  .holidays <- do.call(
    "c",
    lapply(years, function(year) {
      do.call(
        "c",
        lapply(holidays, function(holiday) {
          timeDate::as.Date.timeDate(timeDate::holiday(
            year = year,
            Holiday = holiday
          ))
        })
      )
    })
  )
  # Return intersection of dates and holidays
  holidays_fct <- factor(
    rep(0, length(dates)),
    levels = c(0, 1),
    labels = c("Not Holiday", "Holiday")
  )
  holidays_fct[dates %in% .holidays] <- "Holiday"
  holidays_fct
} # /rtemis::get_holidays


#' Date to factor time bin
#'
#' Convert Date to time bin factor.
#'
#' Order of levels will be chronological (important e.g. for plotting)
#' Additionally, can output ordered factor with `ordered = TRUE`
#'
#' @param x Date vector
#' @param time_bin Character: "year", "quarter", "month", or "day"
#' @param make_bins Character: "range" or "preseent". If "range" the factor levels will include all
#' time periods define by `time_bin` within `bin_range`. This means factor levels can be
#' empty. Otherwise, if "present", factor levels only include time periods present in data.
#' @param bin_range Date, vector, length 2: Range of dates to make levels for. Defaults to range of
#' input dates `x`.
#' @param ordered Logical: If TRUE, factor output is ordered.
#'
#' @return factor of time periods
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' library(data.table)
#' startDate <- as.Date("2018-01-01")
#' endDate <- as.Date("2020-12-31")
#' time <- sample(seq(startDate, endDate, length.out = 100))
#' date2factor(time)
#' date2factor(time, "quarter")
#' date2factor(time, "month")
#' date2factor(time, "day")
#' # range vs. present
#' x <- sample(seq(as.Date("2018-01-01"), as.Date("2021-01-01"), by = 1), 10)
#' date2factor(x, time_bin = "quarter", make_bins = "present")
#' date2factor(x, time_bin = "quarter", make_bins = "range")
date2factor <- function(
  x,
  time_bin = c("year", "quarter", "month", "day"),
  make_bins = c("range", "present"),
  bin_range = range(x, na.rm = TRUE),
  ordered = FALSE
) {
  time_bin <- match.arg(time_bin)
  make_bins <- match.arg(make_bins)

  if (time_bin == "year") {
    if (make_bins == "present") {
      factor(data.table::year(x), ordered = ordered)
    } else {
      out <- as.character(data.table::year(x))
      factor(
        out,
        levels = as.character(seq(
          data.table::year(bin_range[1]),
          data.table::year(bin_range[2])
        )),
        ordered = ordered
      )
    }
  } else if (time_bin == "quarter") {
    if (make_bins == "present") {
      factor(
        paste0(data.table::year(x), " Q", data.table::quarter(x)),
        ordered = ordered
      )
    } else {
      factor(
        paste0(data.table::year(x), " Q", data.table::quarter(x)),
        levels = levels(date2yq(seq(
          bin_range[1],
          bin_range[2],
          by = "quarter"
        ))),
        ordered = ordered
      )
    }
  } else if (time_bin == "month") {
    ym <- paste(substr(months(x), 1, 3), data.table::year(x))
    if (make_bins == "present") {
      .levels <- unique(ym[order(x)])
      factor(ym, levels = .levels, ordered = ordered)
    } else {
      factor(
        ym,
        levels = levels(date2ym(seq(bin_range[1], bin_range[2], by = "month"))),
        ordered = ordered
      )
    }
  } else if (time_bin == "day") {
    if (make_bins == "present") {
      factor(x, levels = as.character(unique(x[order(x)])), ordered = ordered)
    } else {
      factor(
        x,
        levels = as.character(seq(bin_range[1], bin_range[2], by = 1)),
        ordered = ordered
      )
    }
  }
} # /rtemis::date2factor


#' Date to year-quarter factor
#'
#' @param x Date vector
#' @param ordered Logical: If TRUE, return ordered factor.
#'
#' @author EDG
#' @keywords internal
#' @noRd
date2yq <- function(x, ordered = FALSE) {
  factor(
    paste0(data.table::year(x), " Q", data.table::quarter(x)),
    ordered = ordered
  )
} # /rtemis::date2yq


#' Date to year-month factor
#'
#' @param x Date vector
#' @param ordered Logical: If TRUE, return ordered factor.
#'
#' @author EDG
#' @keywords internal
#' @noRd
date2ym <- function(x, ordered = FALSE) {
  ym <- paste(substr(months(x), 1, 3), data.table::year(x))
  .levels <- unique(ym[order(x)])
  factor(ym, levels = .levels, ordered = ordered)
} # /rtemis::date2ym


================================================
FILE: R/utils_df.R
================================================
# dataops
# ::rtemis::
# 2021 EDG rtemis.org

#' Get names by string matching or class
#'
#' @details
#' For `getnames()` only:
#' `pattern`, `starts_with`, and `ends_with` are applied sequentially.
#' If more than one is provided, the result will be the intersection of all matches.
#'
#'
#' @param x object with `names()` method.
#' @param pattern Character: pattern to match anywhere in names of x.
#' @param starts_with Character: pattern to match in the beginning of names of x.
#' @param ends_with Character: pattern to match at the end of names of x.
#' @param ignore_case Logical: If TRUE, well, ignore case.
#'
#' @return Character vector of matched names.
#'
#' @author EDG
#' @export
#'
#' @examples
#' getnames(iris, starts_with = "Sepal")
#' getnames(iris, ends_with = "Width")
#' getfactornames(iris)
#' getnumericnames(iris)
getnames <- function(
  x,
  pattern = NULL,
  starts_with = NULL,
  ends_with = NULL,
  ignore_case = TRUE
) {
  .names <- if (is.character(x)) {
    x
  } else {
    names(x)
  }
  # Apply filters sequentially
  if (!is.null(pattern)) {
    .names <- .names[grep(pattern, .names, ignore.case = ignore_case)]
  }
  if (!is.null(starts_with)) {
    .names <- .names[
      grep(paste0("^", starts_with), .names, ignore.case = ignore_case)
    ]
  }
  if (!is.null(ends_with)) {
    .names <- .names[
      grep(paste0(ends_with, "$"), .names, ignore.case = ignore_case)
    ]
  }
  .names
} # /rtemis::getnames


#' Get names by string matching multiple patterns
#'
#' @details
#' `pattern`, `starts_with`, and `ends_with` are applied and the union of all matches is returned.
#' `pattern` can be a character vector of multiple patterns to match.
#'
#' @param x Character vector or object with `names()` method.
#' @param pattern Character vector: pattern(s) to match anywhere in names of x.
#' @param starts_with Character: pattern to match in the beginning of names of x.
#' @param ends_with Character: pattern to match at the end of names of x.
#' @param ignore_case Logical: If TRUE, well, ignore case.
#' @param return_index Logical: If TRUE, return integer index of matches instead of names.
#'
#' @return Character vector of matched names or integer index.
#'
#' @author EDG
#' @export
#'
#' @examples
#' mgetnames(iris, pattern = c("Sepal", "Petal"))
#' mgetnames(iris, starts_with = "Sepal")
#' mgetnames(iris, ends_with = "Width")
mgetnames <- function(
  x,
  pattern = NULL,
  starts_with = NULL,
  ends_with = NULL,
  ignore_case = TRUE,
  return_index = FALSE
) {
  .names <- if (is.character(x)) x else names(x)
  idi <- numeric()
  if (!is.null(pattern)) {
    idi <- c(
      idi,
      unlist(lapply(
        pattern,
        function(p) grep(p, .names, ignore.case = ignore_case)
      ))
    )
  }
  if (!is.null(starts_with)) {
    idi <- c(idi, which(startsWith(.names, starts_with)))
  }
  if (!is.null(ends_with)) {
    idi <- c(idi, which(endsWith(.names, ends_with)))
  }
  idi <- unique(idi)
  if (return_index) {
    idi
  } else {
    .names[idi]
  }
}

# Get factor/numeric/logical/character names from data.frame/data.table ----

# @param x data.frame or data.table (or data.frame-compatible object)
# @return Character vector of column names of x with the specified class.
#'
#' @rdname getnames
#' @export
getfactornames <- function(x) names(x)[sapply(x, is.factor)]

#' @rdname getnames
#' @export
getnumericnames <- function(x) names(x)[sapply(x, is.numeric)]

#' @rdname getnames
#' @export
getlogicalnames <- function(x) names(x)[sapply(x, is.logical)]

#' @rdname getnames
#' @export
getcharacternames <- function(x) names(x)[sapply(x, is.character)]

#' @rdname getnames
#' @export
getdatenames <- function(x) {
  date_id <- sapply(
    x,
    \(v) class(v)[1] %in% c("Date", "IDate", "POSIXct", "POSIXlt")
  )
  names(x)[date_id]
}

#' Get data.frame names and types
#'
#' @param x data.frame / data.table or similar
#' @return character vector of column names with attribute "type" holding the class of each
#' column
#'
#' @author EDG
#' @export
#'
#' @examples
#' getnamesandtypes(iris)
getnamesandtypes <- function(x) {
  xnames <- names(x)
  attr(xnames, "type") <- sapply(x, class)
  xnames
} # /rtemis::namesandtypes


#' Unique values per feature
#'
#' Get number of unique values per features
#'
#' @param x matrix or data frame input
#' @param excludeNA Logical: If TRUE, exclude NA values from unique count.
#'
#' @return Vector, integer of length `NCOL(x)` with number of unique
#' values per column/feature
#'
#' @author EDG
#' @export
#'
#' @examples
#' df_nunique_perfeat(iris)
df_nunique_perfeat <- function(x, excludeNA = FALSE) {
  if (excludeNA) {
    apply(x, 2, function(i) length(unique(na.exclude(i))))
  } else {
    apply(x, 2, function(i) length(unique(i)))
  }
} # /rtemis::df_nunique_perfeat


#' Move data frame column
#'
#' @param x data.frame.
#' @param colname Character: Name of column you want to move.
#' @param to Integer: Which column position to move the vector to.
#' Default = `ncol(x)` i.e. the last column.
#'
#' @return data.frame
#'
#' @author EDG
#' @export
#'
#' @examples
#' ir <- df_movecolumn(iris, colname = "Species", to = 1L)
df_movecolumn <- function(x, colname, to = ncol(x)) {
  if (!is.data.frame(x)) {
    cli::cli_abort("Input {.arg x} must be a data frame.")
  }

  check_character(colname, allow_null = FALSE)

  to <- clean_int(to)

  if (NCOL(x) < 2) {
    cli::cli_abort("Input data.frame {.arg x} must have at least 2 columns.")
  }

  if (!(colname %in% names(x))) {
    cli::cli_abort("Column {.val {colname}} not found in input data frame.")
  }

  ncols <- ncol(x)
  if (to < 1L || to > ncols) {
    cli::cli_abort("{.arg to} must be between 1 and {.val {ncols}}.")
  }

  xnames <- setdiff(names(x), colname)
  x[, append(xnames, colname, after = to - 1L)]
} # /rtemis::df_movecolumn


#' Vector to data.frame
#'
#' Convert vector to 1-row data.frame, maintaining names if present
#'
#' @param x Vector.
#' @param col_names Character: Name of the vector.
#'
#' @return data.frame.
#'
#' @author EDG
#' @keywords internal
#' @noRd
vec2df <- function(x, col_names = NULL) {
  if (!is.vector(x)) {
    cli::cli_abort("Input must be a vector")
  }
  if (!is.null(col_names)) {
    names(x) <- col_names
  }
  as.data.frame(t(x))
} # /rtemis::vec2df


================================================
FILE: R/utils_exec.R
================================================
# utils_exec.Ranger
# ::rtemis::
# 2025 EDG rtemis.org

#' Do call with tryCatch and suggestion
#'
#' @param fn Function to call.
#' @param args List of arguments to pass to function.
#' @param error_pattern_suggestion Named list of the form pattern = "suggestion". If the pattern is
#'  found in the error message, the suggestion is appended to the error message.
#' @param warning_pattern_suggestion Named list of the form pattern = "suggestion". If the pattern is
#'
#' @return Result of function call.
#'
#' @author EDG
#' @keywords internal
#' @noRd
do_call <- function(
  fn,
  args,
  error_pattern_suggestion = NULL,
  warning_pattern_suggestion = NULL
) {
  call <- parent.frame(n = 1L)
  common_errors <- list(
    "object '(.*)' not found" = "Check that the object exists and is spelled correctly.",
    "object of type 'closure' is not subsettable" = "Check that the object is a list or data.frame."
  )
  common_warnings <- list(
    "NAs introduced by coercion" = "Check that the input is of the correct type.",
    # "glm.fit: algorithm did not converge" =
    # "Same reasons as for 'glm.fit: fitted probabilities numerically 0 or 1 occurred'.",
    "glm.fit: fitted probabilities numerically 0 or 1 occurred" = paste(
      bold("Reasons for this warning include:"),
      "1) Perfect Separation of classes.",
      "2) Highly Imbalanced data.",
      "3) Extreme values in predictors.",
      "4) Too many predictors for the number of observations.",
      "5) Multicollinearity.",
      bold("Suggestion:"),
      "Try using GLMNET or tree-based algorithms",
      sep = "\n  "
    )
  )
  err_pat_sug <- c(common_errors, error_pattern_suggestion)
  warn_pat_sug <- c(common_warnings, warning_pattern_suggestion)
  tryCatch(
    {
      withCallingHandlers(
        {
          do.call(fn, args)
        },
        warning = function(w) {
          fnwarn <- conditionMessage(w)
          message("Warning caught: ", fnwarn)
          idi <- which(sapply(
            names(warn_pat_sug),
            function(i) grepl(i, fnwarn)
          ))
          if (length(idi) > 0) {
            for (i in idi) {
              cat(orange(warn_pat_sug[[i]], "\n"))
            }
          }
          invokeRestart("muffleWarning")
        } # /warning
      ) # /withCallingHandlers
    },
    error = function(e) {
      fnerr <- e[["message"]]
      errmsg <- paste0(highlight(fn), " failed with error:\n", fnerr, "\n")
      idi <- which(sapply(names(err_pat_sug), function(i) grepl(i, fnerr)))
      if (length(idi) > 0) {
        suggestions <- sapply(idi, function(i) err_pat_sug[[i]])
        errmsg <- paste0(
          red(errmsg),
          orange(
            paste0(
              bold("\nSuggestion:\n  "),
              paste0(suggestions, collapse = "\n  ")
            )
          )
        )
      }
      cat("\n")
      cli::cli_abort(errmsg, call = call)
    } # /error
  ) # /tryCatch
} # /rtemis::do_call


================================================
FILE: R/utils_files.R
================================================
# utils_files.R
# ::rtemis::
# 2025 EDG rtemis.org

#' Expand, normalize, concatenate, clean path
#'
#' @param ... Character: Parts of path to concatenate.
#' @param expand_path Logical: If TRUE, expand concatenated path using [path.expand].
#'
#' @return Character: Path.
#'
#' @author EDG
#' @keywords internal
#' @noRd
make_path <- function(..., expand_path = TRUE) {
  path <- list(...)
  # Remove final "/"
  path <- lapply(path, \(x) gsub("\\/$", "", x))
  # Concat
  path <- do.call(file.path, path)
  # Expand
  if (expand_path) {
    path <- path.expand(path)
  }
  path
} # /rtemis::make_path


================================================
FILE: R/utils_html.R
================================================
# html_ops.R
# ::rtemis::
# 2023- EDG rtemis.org

#' @keywords internal
#' @noRd
html_highlight <- function(..., bold = TRUE) {
  if (bold) {
    span(..., style = "color: #16A0AC; font-weight: 700;")
  } else {
    span(..., style = "color: #16A0AC;")
  }
}


#' @keywords internal
#' @noRd
html_orange <- function(..., bold = TRUE) {
  if (bold) {
    span(..., style = "color: #FA6E1E; font-weight: 700;")
  } else {
    span(..., style = "color: #FA6E1E;")
  }
}


#' @keywords internal
#' @noRd
html_red <- function(..., bold = TRUE) {
  if (bold) {
    span(..., style = "color: #E61048; font-weight: 700;")
  } else {
    span(..., style = "color: #E61048;")
  }
}


#' @keywords internal
#' @noRd
html_success <- function(..., bold = TRUE) {
  if (bold) {
    span(..., style = "color: #32A03E; font-weight: 700;")
  } else {
    span(..., style = "color: #32A03E;")
  }
}


================================================
FILE: R/utils_io.R
================================================
# utils_io.R
# ::rtemis::
# 2022 EDG rtemis.org

#' Write \pkg{rtemis} model to RDS file
#'
#' @param object `Supervised` object.
#' @param outdir Path to output directory.
#' @param file_prefix Character: Prefix for filename.
#' @param verbosity Integer: Verbosity level.
#'
#' @author EDG
#' @keywords internal
#' @noRd
rt_save <- function(
  object,
  outdir,
  file_prefix,
  print_load_info = TRUE,
  verbosity = 1L
) {
  # Message before expanding outdir to preserve privacy when using relative paths.
  if (verbosity > 0L) {
    start_time <- Sys.time()
    msg0(
      "Writing data to ",
      outdir,
      "...",
      caller = NA,
      newline = FALSE
    )
  }
  outdir <- sanitize_path(outdir, must_exist = FALSE, type = "any")
  if (!dir.exists(outdir)) {
    dir.create(outdir, recursive = TRUE, showWarnings = FALSE)
  }
  rds_path <- file.path(outdir, paste0(file_prefix, ".rds"))
  try(saveRDS(object, rds_path))
  if (verbosity > 0L) {
    elapsed <- Sys.time() - start_time
  }
  if (file.exists(rds_path)) {
    if (verbosity > 0L) {
      yay(format(elapsed, digits = 2), gray(" [rt_save]"), sep = "")
      if (print_load_info) {
        msg0(gray(
          paste0(
            "Reload with: ",
            "> obj <- readRDS('",
            rds_path,
            "')"
          )
        ))
      }
    }
  } else {
    if (verbosity > 0L) {
      nay(
        "Failed after ",
        format(elapsed, digits = 2),
        gray(" [rt_save]"),
        sep = ""
      )
    }
    cli::cli_abort("Error: Saving model to ", outdir, " failed.")
  }
} # /rtemis::rt_save

#' Check file(s) exist
#'
#' @param paths Character vector of paths
#' @param verbosity Integer: Verbosity level.
#' @param pad Integer: Number of spaces to pad to the left
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
check_files <- function(paths, verbosity = 1L, pad = 0) {
  if (verbosity > 0L) {
    msg0("Checking ", singorplu(length(paths), "file"), ":")
  }

  for (f in paths) {
    if (file.exists(f)) {
      if (verbosity > 0L) {
        yay(f, pad = pad)
      }
    } else {
      if (verbosity > 0L) {
        nay(paste(f, red(" not found!")), pad = pad)
      }
      cli::cli_abort("File not found")
    }
  }
} # /rtemis::check_files

# %% sanitize_path ----
#' Sanitize and validate file paths for security
#'
#' Validates and normalizes file paths to prevent security vulnerabilities
#' including command injection, path traversal, and unauthorized file access.
#'
#' @param path Character: File or directory path to sanitize.
#' @param must_exist Logical: If TRUE, abort if path does not exist. Default = FALSE.
#' @param allowed_base Character: Optional base directory to restrict paths to. If provided,
#' the normalized path must be within this directory. Default = NULL (no restriction).
#' @param allow_urls Logical: If TRUE, allow URL schemes (http://, https://, etc.).
#' Default = FALSE.
#' @param type Character: Expected path type - "file", "directory", or "any". Only checked
#' if `must_exist = TRUE`. Default = "any".
#'
#' @return Character: Sanitized and normalized absolute path.
#'
#' @details
#' Security checks performed:
#' - Rejects paths starting with pipe character (prevents command injection in R readers)
#' - Rejects paths containing null bytes
#' - Rejects URL schemes unless `allow_urls = TRUE`
#' - Normalizes path to absolute form
#' - Optionally validates path exists and is correct type
#' - Optionally validates path is within allowed base directory
#'
#' @author EDG
#' @keywords internal
#' @noRd
sanitize_path <- function(
  path,
  must_exist = FALSE,
  allowed_base = NULL,
  allow_urls = FALSE,
  type = c("any", "file", "directory")
) {
  type <- match.arg(type)

  # Check for NULL or empty
  if (is.null(path) || length(path) == 0L || nchar(path) == 0L) {
    cli::cli_abort("Path cannot be NULL or empty.")
  }

  # Check for multiple paths
  if (length(path) > 1L) {
    cli::cli_abort("Function accepts a single path. Got {length(path)} paths.")
  }

  # Check for null bytes (check if raw bytes contain 0x00)
  if (any(charToRaw(path) == 0L)) {
    cli::cli_abort("Path contains null byte: {.file {path}}")
  }

  # Check for pipe character at start (command injection vector)
  if (grepl("^\\s*\\|", path)) {
    cli::cli_abort("Path cannot start with pipe character: {.file {path}}")
  }

  # Check for URL schemes unless explicitly allowed
  if (!allow_urls && grepl("^[a-zA-Z][a-zA-Z0-9+.-]*://", path)) {
    cli::cli_abort("URL schemes not allowed: {.file {path}}")
  }

  # Normalize to absolute path
  # mustWork = FALSE allows non-existent paths, will check separately if needed
  normalized_path <- normalizePath(path, winslash = "/", mustWork = FALSE)

  # Validate against allowed base directory if specified
  if (!is.null(allowed_base)) {
    allowed_base_norm <- normalizePath(
      allowed_base,
      winslash = "/",
      mustWork = TRUE
    )
    # Check if normalized path starts with allowed base
    if (!startsWith(normalized_path, allowed_base_norm)) {
      cli::cli_abort(
        "Path {.file {path}} is outside allowed directory: {.file {allowed_base}}"
      )
    }
  }

  # Check existence and type if required
  if (must_exist) {
    if (!file.exists(normalized_path)) {
      cli::cli_abort("Path does not exist: {.file {normalized_path}}")
    }

    if (type == "file" && dir.exists(normalized_path)) {
      cli::cli_abort("Path is not a file: {.file {normalized_path}}")
    }

    if (type == "directory" && !dir.exists(normalized_path)) {
      cli::cli_abort("Path is not a directory: {.file {normalized_path}}")
    }
  }

  normalized_path
} # /rtemis::sanitize_path


================================================
FILE: R/utils_lightgbm.R
================================================
# utils_lightgbm.R
# ::rtemis::
# 2023- EDG rtemis.org

# %% prepare_lgb_data ----
#' Prepare data for LightGBM-based learners
#'
#' Shared data preparation for LightGBM, LightRF, and LightCART.
#' Converts factors to 0-based integers, removes the outcome from
#' `categorical_feature`, and creates `lgb.Dataset` objects.
#'
#' @param x tabular data: Training set (features + outcome in last column).
#' @param dat_validation Optional tabular data: Validation set.
#' @param type Character: "Classification" or "Regression".
#' @param weights Optional numeric vector: Case weights for training data.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Named list with elements:
#' - `train_data`: `lgb.Dataset` for training.
#' - `valid_data`: `lgb.Dataset` for validation, or NULL.
#' - `preprocessor`: `Preprocessor` object if factors were converted, or NULL.
#'
#' @author EDG
#' @keywords internal
#' @noRd
prepare_lgb_data <- function(
  x,
  dat_validation = NULL,
  type,
  weights = NULL,
  verbosity = 1L
) {
  # Factor-to-integer preprocessing ----
  factor_index <- names(x)[which(sapply(x, is.factor))]
  if (length(factor_index) > 0L) {
    prp <- preprocess(
      x,
      config = setup_Preprocessor(
        factor2integer = TRUE,
        factor2integer_startat0 = TRUE
      ),
      dat_validation = dat_validation,
      verbosity = verbosity
    )
    if (is.null(dat_validation)) {
      x <- prp@preprocessed
    } else {
      x <- prp@preprocessed[["training"]]
      dat_validation <- prp@preprocessed[["validation"]]
    }
  } else {
    prp <- NULL
  }

  # Remove outcome from factor_index (outcome is last column).
  # For Classification, the outcome is a factor that was also converted;
  # it must not be listed as a categorical feature.
  if (type == "Classification" && length(factor_index) > 0L) {
    factor_index <- factor_index[seq_len(length(factor_index) - 1L)]
  }

  # Create lgb.Datasets ----
  train_data <- lightgbm::lgb.Dataset(
    data = as.matrix(features(x)),
    categorical_feature = factor_index,
    label = outcome(x),
    weight = weights
  )

  valid_data <- if (!is.null(dat_validation)) {
    lightgbm::lgb.Dataset(
      data = as.matrix(features(dat_validation)),
      categorical_feature = factor_index,
      label = outcome(dat_validation)
    )
  }

  list(
    train_data = train_data,
    valid_data = valid_data,
    preprocessor = prp
  )
} # /rtemis::prepare_lgb_data

#' Get LightGBM Booster Trees
#'
#' @return A list of trees
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_lgb_tree <- function(x, n_iter = -1) {
  out <- lapply(
    jsonlite::fromJSON(
      lightgbm::lgb.dump(
        booster = x,
        num_iteration = n_iter
      ),
      simplifyVector = FALSE
    )[["tree_info"]],
    \(y) y[["tree_structure"]]
  )
  names(out) <- paste0("Tree_", seq_along(out))
  out
} # /rtemis::get_lgb_tree


# preorderlgb ----

#' Preorder Traversal of LightGBM Tree
#'
#' Called by `lgbtree2rules` and operates on `tree` environment in place.
#'
#' @param tree Environment that will hold the extracted rules.
#' @param node LightGBM tree.
#' @param rule Character: current rule.
#' @param left Character: left child label.
#' @param right Character: right child label.
#' @param split_feature Character: split feature label.
#' @param threshold Character: threshold label.
#' @param right_cat_type Character: "in" or "notin": operator for right categorical.
#' @param xnames Character vector: feature names.
#' @param factor_levels Named list of factor levels.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Character vector of rules.
#'
#' @author EDG
#' @keywords internal
#' @noRd
preorderlgb <- function(
  tree,
  node,
  rule = "TRUE",
  left = "left_child",
  right = "right_child",
  split_feature = "split_feature",
  threshold = "threshold",
  right_cat_type = "in",
  xnames,
  factor_levels,
  verbosity = 0L
) {
  if (is.null(node[[split_feature]])) {
    names(rule) <- "leaf"
    if (verbosity > 0L) {
      message("Reached a leaf; rule is ", rule, ".")
    }
    tree[["leafs"]] <- c(tree[["leafs"]], rule)
    return(rule)
  }
  rule_left <- paste0(
    rule,
    " & ",
    xnames[node[[split_feature]] + 1],
    decision_left(node[["decision_type"]]),
    fmt_thresh(
      catsplit = node[["decision_type"]] == "==",
      feature = xnames[node[[split_feature]] + 1],
      threshold = node[["threshold"]],
      factor_levels = factor_levels
    )
  )
  rule_right <- paste0(
    rule,
    " & ",
    xnames[node[[split_feature]] + 1],
    decision_right(node[["decision_type"]], right_cat_type),
    fmt_thresh_right(
      catsplit = node[["decision_type"]] == "==",
      feature = xnames[node[[split_feature]] + 1],
      threshold = node[["threshold"]],
      factor_levels = factor_levels,
      cat_type = right_cat_type
    )
  )
  rule_left <- preorderlgb(
    tree,
    node[[left]],
    rule_left,
    left,
    right,
    split_feature,
    threshold,
    right_cat_type = right_cat_type,
    xnames = xnames,
    factor_levels = factor_levels,
    verbosity = verbosity
  )
  rule <- c(rule, rule_left)
  rule_right <- preorderlgb(
    tree,
    node[[right]],
    rule_right,
    left,
    right,
    split_feature,
    threshold,
    right_cat_type = right_cat_type,
    xnames = xnames,
    factor_levels = factor_levels,
    verbosity = verbosity
  )
  rule <- c(rule, rule_right)
} # /rtemis::preorderlgb


# lgbtree2rules ----
lgbtree2rules <- function(x, xnames, factor_levels, right_cat_type = "in") {
  tree <- new.env()
  tree[["leafs"]] <- character()
  preorderlgb(
    tree,
    x,
    xnames = xnames,
    right_cat_type = right_cat_type,
    factor_levels = factor_levels
  )
  # remove root node "TRUE & "
  substr(tree[["leafs"]], 8, 99999)
} # /rtemis::lgbtree2rules


# lgb2rules ----
#' Convert LightGBM Booster to set of rules
#'
#' @param x LightGBM Booster object
#' @param n_iter Integer: Number of trees to convert to rules
#' @param xnames Character vector: Names of features
#'
#' @return Character vector of rules
#'
#' @author EDG
#' @keywords internal
#' @noRd
lgb2rules <- function(
  Booster,
  n_iter = NULL,
  xnames,
  factor_levels,
  right_cat_type = "in",
  return_unique = TRUE,
  verbosity = 1L
) {
  if (verbosity > 0L) {
    msgstart("Extracting LightGBM rules...")
  }
  if (is.null(n_iter)) {
    n_iter <- length(Booster)
  }
  trees <- get_lgb_tree(Booster, n_iter)
  rules <- lapply(trees, function(x) {
    lgbtree2rules(
      x,
      xnames,
      factor_levels = factor_levels,
      right_cat_type = right_cat_type
    )
  }) |>
    unlist()
  if (verbosity > 0L) {
    msgdone()
  }
  if (return_unique) unique(rules) else rules
} # /rtemis::lgb2rules


# extract_rules.lgb.Booster ----
#' author EDG
#' @keywords internal
#' @noRd
method(extract_rules, class_lgb.Booster) <- function(
  x,
  n_iter = NULL,
  xnames,
  factor_levels,
  right_cat_type = "in",
  return_unique = TRUE,
  verbosity = 1L
) {
  if (verbosity > 0L) {
    msgstart("Extracting LightGBM rules...")
  }
  if (is.null(n_iter)) {
    n_iter <- length(x)
  }
  trees <- get_lgb_tree(x, n_iter)
  rules <- lapply(trees, function(x) {
    lgbtree2rules(
      x,
      xnames,
      factor_levels = factor_levels,
      right_cat_type = right_cat_type
    )
  }) |>
    unlist()

  rules <- if (return_unique) {
    unique(rules)
  } else {
    rules
  }
  if (verbosity > 0L) {
    msgdone()
    msg0(
      "Extracted ",
      highlight(length(rules)),
      ifelse(return_unique, " unique", ""),
      " rules."
    )
  }
  rules
} # /rtemis::extract_rules.lgb.Booster


# decision_left ----
#' @author EDG
#' @keywords internal
#' @noRd
decision_left <- function(decision_type) {
  switch(decision_type, "<=" = " <= ", "==" = " %in% ")
} # /rtemis::decision_left


#' @author EDG
#' @keywords internal
#' @noRd
decision_right <- function(decision_type, cat_type) {
  switch(
    decision_type,
    "<=" = " > ",
    "==" = if (cat_type == "in") " %in% " else " %notin% "
  )
} # /rtemis::decision_right


#' Format rule thresholds
#'
#' @param catsplit Logical: If TRUE, feature is categorical
#' @param feature Character: feature name
#' @param threshold Character: threshold as reported by lightgbm
#' @param factor_levels Named list of factor levels. Names should correspond to training
#' set column names.
#'
#' @author EDG
#' @keywords internal
#' @noRd
fmt_thresh <- function(catsplit, feature, threshold, factor_levels) {
  if (catsplit) {
    flevels <- as.integer(strsplit(threshold, "\\|\\|")[[1]]) + 1 # 0- to 1-based factor level index
    flevels <- factor_levels[[feature]][flevels]
    paste0(
      "c(",
      paste0("'", flevels, "'", collapse = ","),
      ")"
    )
  } else {
    threshold
  }
} # /rtemis::fmt_thresh


#' @author EDG
#' @keywords internal
#' @noRd
fmt_thresh_right <- function(
  catsplit,
  feature,
  threshold,
  factor_levels,
  cat_type
) {
  if (catsplit) {
    flevels <- as.integer(strsplit(threshold, "\\|\\|")[[1]]) + 1 # 0- to 1-based factor level index
    flevels <- factor_levels[[feature]][flevels]
    if (cat_type == "in") {
      flevels <- setdiff(factor_levels[[feature]], flevels)
    }
    paste0(
      "c(",
      paste0("'", flevels, "'", collapse = ","),
      ")"
    )
  } else {
    threshold
  }
} # /rtemis::fmt_thresh_right


================================================
FILE: R/utils_palettes.R
================================================
# palettes.R
# ::rtemis::
# 2016- EDG rtemis.org

# Colors ----
#' @keywords internal
#' @noRd
ucsfCol <- c(
  Navy = "#052049",
  A2 = "#0F388A",
  A3_CTA_Blue = "#006BE9",
  B3_Blue = "#178CCB",
  B5 = "#B8E6FA",
  B6 = "#E2F4FC",
  C1 = "#0E5258",
  C2 = "#14828C",
  C3_Teal = "#16A0AC",
  C4 = "#60D0DA",
  C5 = "#B4E2E8",
  D1 = "#00483A",
  D2 = "#007242",
  D3_Green = "#32A03E",
  E3_Chartreuse = "#84C234",
  E4_Point_Reyes = "#B4DC55",
  F1 = "#2E2872",
  F2 = "#443E8C",
  F3_Purple = "#6C62D0",
  F4_Yosemite = "#8A8CE3",
  F5 = "#C0C0EA",
  G1 = "#461850",
  G2 = "#6C247C",
  G3_Violet = "#A238BA",
  G4 = "#C45ED8",
  G5 = "#EACCF0",
  H1 = "#561038",
  H2 = "#821A56",
  H3_Magenta = "#C42882",
  H4 = "#E266AE",
  H5 = "#F2C2DE",
  I3_Blue_Gray = "#506380",
  I6 = "#F2F3F4",
  J2 = "#878D96",
  J3_Cool_Gray = "#B4B9BF",
  J5 = "#E1E3E5",
  K3_Gray = "#D1D3D3",
  L3_Yellow = "#FEB80A",
  M3_Orange = "#FA6E1E",
  N3_Red = "#E61048"
)


# UC Davis ----
#' UC Davis Colors
#'
#' `ucdCol`: UC Davis color palette
#' (https://marketingtoolbox.ucdavis.edu/visual-identity/color.html)
#' @keywords internal
#' @noRd
ucdCol <- c(
  davisBlue = "#002855",
  davisGold = "#DAAA00",
  unitransRed = "#BA0C2F",
  westernRedbud = "#C6007E",
  californiaPoppy = "#ED8B00",
  goldenLupine = "#FFCD00",
  sunnyGrass = "#78BE20",
  skyBlue = "#00B5E2",
  recPoolBlue = "#008EAA",
  wineGrape = "#642667",
  muBrick = "#C26E60",
  hartHallStucco = "#E6A65D",
  sageGreen = "#9CAF88",
  evergreen = "#00573F",
  winterSkyGray = "#5B7F95",
  centennialWalkGray = "#B1B3B3",
  corkOak = "#ACA39A",
  southHallShingleBrown = "#4F2C1D"
)


# Berkeley ----
#' Berkeley Colors
#'
#' `berkeleyCol`: Berkeley color palette
#' (https://brand.berkeley.edu/colors/)
#'
#' @keywords internal
#' @noRd
berkeleyCol <- c(
  Berkeley_Blue = "#003262",
  Founders_Rock = "#3B7EA1",
  California_Gold = "#FDB515",
  Medalist = "#C4820E",
  Wellman_Tile = "#D9661F",
  Rose_Garden = "#EE1F60",
  Golden_Gate = "#ED4E33",
  South_Hall = "#6C3302",
  Bay_Fog = "#DDD5C7",
  Lawrence = "#00B0DA",
  LapLane = "#00A598",
  Pacific = "#46535E",
  Sather_Gate = "#B9D3B6",
  Ion = "#CFDD45",
  Soy_Bean = "#859438",
  Stone_Pine = "#584F29",
  Grey = "#EEEEEE",
  Web_Grey = "#888888"
)


# UC Santa Cruz ----
#' UC Santa Cruz Colors
#'
#' `ucscCol`: UC Santa Cruz color palette
#' (https://communications.ucsc.edu/visual-design/color/)
#'
#' @keywords internal
#' @noRd
ucscCol <- c(
  blue = "#003c6c",
  gold = "#fdc700",
  mediumBlue = "#006aad",
  lightBlue = "#13a5dc",
  teal = "#007988",
  orange = "#f29813",
  yellow = "#ffbf00",
  green = "#93c02d",
  rubineRed = "#da216d"
)


# UC Merced ----
#' UC Merced Colors
#'
#' `ucmercedCol`: UC Merced color palette
#' (https://publicrelations.ucmerced.edu/color-guidelines)
#'
#' @keywords internal
#' @noRd
ucmercedCol <- c(
  mercedRiverBlue = "#092f44",
  foothillsGold = "#a29061",
  sierraSkyBlue = "#5f8498",
  wildflowerBabyBlueEyes = "#2980b9",
  yosemiteSnowWhite = "#F8F5EC",
  halfDomeSlate = "#5B5B5B",
  mercedRyeGreen = "#235B16"
)


# UC Santa Barbara ----
#' UC Santa Barbara Colors
#'
#' `ucsbCol`: UC Santa Barbara color palette
#' (https://www.ucsb.edu/visual-identity/color)
#'
#' @keywords internal
#' @noRd
ucsbCol <- c(
  navy = "#003660",
  gold = "#FEBC11",
  aqua = "#04859B",
  moss = "#7A8D39",
  seaGreen = "#0BA89A",
  coral = "#EF5645",
  mist = "#9CBEBE",
  clay = "#DCD6CC",
  sandstone = "#C9BF9D",
  lightGray = "#DCE1E5"
)


# UCLA ----
#' UCLA Colors
#'
#' `uclaCol`: UCLA color palette (http://brand.ucla.edu/identity/colors)
#'
#' @keywords internal
#' @noRd
uclaCol <- c(
  Blue = "#2774AE",
  Gold = "#FFD100",
  Darkest_Blue = "#003B5C",
  Darker_Blue = "#005587",
  Lighter_Blue = "#8BB8E8",
  Lightest_Blue = "#C3D7EE",
  Darkest_Gold = "#FFB81C",
  Darker_Gold = "#FFC72C",
  Yellow = "#FFFF00",
  Green = "#00FF87",
  Magenta = "#FF00A5",
  Cyan = "#00FFFF",
  Purple = "#8237FF"
)


# UC Riverside ----
#' UC Riverside Colors
#'
#' `ucrCol`: UC Riverside color palette (https://brand.ucr.edu/ucr-colors)
#'
#' @keywords internal
#' @noRd
ucrColor <- c(
  ucrBlue = "#2d6cc0",
  ucrGold = "#f1ab00",
  ucrGray = "#393b41"
)


# UCI ----
#' UCI Colors
#'
#' `uciCol`: UCI color palette (https://communications.uci.edu/campus-resources/graphic-standards/colors.php)
#'
#' @keywords internal
#' @noRd
uciCol <- c(
  blue = "#0064a4",
  yellow = "#ffd200",
  teal = "#6aa2b8",
  lightGray = "#c6beb5",
  navy = "#1b3d6d",
  orange = "#f78d2d",
  darkGray = "#555759",
  lightYellow = "#f7eb5f"
)


# UC San Diego ----
#' UC San Diego Colors
#'
#' `ucsdCol`: UC San Diego color palette
#' (https://ucpa.ucsd.edu/brand/elements/color-palette/)
#'
#' @keywords internal
#' @noRd
ucsdCol <- c(
  blue = "#182B49",
  mediumBlue = "#006A96",
  gold = "#C69214",
  yellow = "#FFCD00",
  cyan = "#00C6D7",
  green = "#6E963B",
  lightYellow = "#F3E500",
  orange = "#FC8900",
  coolGray = "#747678",
  lightGray = "#B6B1A9",
  darkGold = "#84754E"
)


# University of California ----
#' University of California Colors
#'
#' `californiaCol`: University of California color palette
#' (http://brand.universityofcalifornia.edu/guidelines/color.html#!primary-colors)
#'
#' @keywords internal
#' @noRd
ucCol <- c(
  ucBlue = "#1295D8",
  ucGold = "#FFB511",
  blue = "#005581",
  lightBlue = "#72CDF4",
  gold = "#FFD200",
  lightgold = "#FFE552",
  orange = "#FF6E1B",
  lightOrange = "#FF8F28",
  pink = "#E44C9A",
  lightPink = "#FEB2E0",
  teal = "#00778B",
  lightTeal = "#00A3AD",
  ucGray = "#7C7E7F",
  warmGray8 = "#8F8884",
  warmGray3 = "#BEB6AF",
  warmGray1 = "#DBD5CD",
  metallicGold = "#B4975A"
)

# Stanford ----
#' Stanford Colors
#'
#' `stanfordCol`: Stanford color palette
#' (https://identity.stanford.edu/color.html#digital-color)
#'
#' @keywords internal
#' @noRd
stanfordCol <- c(
  Cardinal = "#8c1515",
  Cool_Grey = "#4d4f53",
  Black = "#2e2d29",
  Bright_Red = "#B1040E",
  Chocolate = "#2F2424",
  Stone = "#544948",
  Fog = "#F4F4F4",
  Light_Sandstone = "#F9F6EF",
  Sandstone = "#d2c295",
  Warm_Grey = "#3f3c30",
  Beige = "#9d9573",
  Light_Sage = "#c7d1c5",
  Clay = "#5f574f",
  Cloud = "#dad7cb",
  Driftwood = "#b6b1a9",
  Sandhill = "#b3995d",
  Palo_Alto = "#175e54",
  Teal = "#00505c",
  Purple = "#53284f",
  Redwood = "#8d3c1e",
  Brown = "#5e3032",
  Sky = "#0098db",
  Lagunita = "#007c92",
  Mint = "#009b76",
  Gold = "#b26f16",
  Sun = "#eaab00",
  Poppy = "#e98300"
)

# California State University ----
#' California State University Colors
#'
#' `csuCol`: California State University color palette
#' (https://www2.calstate.edu/csu-system/csu-branding-standards/Documents/CSU-Brand-Guidelines-8-2018.pdf)
#'
#' @keywords internal
#' @noRd
csuCol <- c(
  red = "#CC0B2A",
  coolGray = "#D9D9D6",
  black = "#2F2F2F"
)


# California Polytechnic State University ----
#' California Polytechnic State University Colors
#'
#' `calpolyCol`: Cal Poly color palette
#' (https://universitymarketing.calpoly.edu/brand-guidelines/colors/)
#'
#' @keywords internal
#' @noRd
calpolyCol <- c(
  calpolygreen = "#154734",
  calpolygold = "#C69214",
  stadiumgold = "#F8E08E",
  polycanyon = "#F2C75C",
  dextergreen = "#A4D65E",
  farmersmarket = "#3A913F",
  skyblue = "#B5E3D8",
  surfblue = "#5CB8B2",
  serenity = "#D3E3F4",
  morroblue = "#ABCAE9",
  missionbeige = "#E4E1D1",
  pismosand = "#CAC7A7",
  coastsage = "#B6CCC2",
  sycamore = "#789F90",
  kennedygray = "#8E9089",
  sealgray = "#54585A",
  heritageorange = "#FF6A39",
  avodaco = "#D0DF00"
)

# Caltech ----
#' Caltech Colors
#'
#' `caltechCol`: Caltech color palette (http://identity.caltech.edu/colors)
#'
#' @keywords internal
#' @noRd
caltechCol <- c(
  orange = "#FF6C0C",
  coolGray9 = "#76777B",
  coolGray3c = "#C8C8C8",
  pms414 = "#AAA99F",
  pms5497c = "#849895",
  pms7494c = "#9DAE88",
  pms451c = "#C7B784",
  pms7403c = "#F1D384",
  pms548c = "#003B4C",
  pms3292c = "#005851",
  pms668c = "#644B78",
  pms195c = "#7A303F",
  pms186c = "#CF0A2C",
  pms299c = "#00A1DF",
  pms7473c = "#1E988A",
  pms7489c = "#73A950",
  pms7408c = "#F9BE00",
  pms605c = "#E2CC00",
  pms1915c = "#F54D80"
)

# Scripps Research ----
#' Scripps Research Colors
#'
#' `scrippsCol`: Scripps Research color palette
#'
#' @keywords internal
#' @noRd
scrippsCol <- c(
  yellow = "#edb035",
  orange = "#f1624f",
  maroon = "#610f37",
  blue = "#273d78",
  teal = "#116f79",
  lightblue = "#59c3d3"
)

# Penn ----
#' rtemis Color Palettes
#'
#' `pennCol`: Penn color palette
#' (http://www.upenn.edu/about/styleguide-color-type)
#'
#' @keywords internal
#' @noRd
pennCol <- c(
  darkestBlue = "#000f3a",
  darkerBlue = "#00144d",
  blue = "#01256e",
  lighterBlue = "#045ea7",
  lightestBlue = "#82afd3",
  darkestRed = "#57000a",
  darkerRed = "#74000e",
  red = "#95001a",
  lighterRed = "#c2004d",
  lightestRed = "#e180a6",
  darkestYellow = "#af7f00",
  darkerYellow = "#eaa900",
  yellow = "#f2c100",
  lighterYellow = "#f8de00",
  lightestYellow = "#fcef80",
  darkestGreen = "#005200",
  darkerGreen = "#006e00",
  green = "#008e00",
  lighterGreen = "#00be00",
  lightestGreen = "#80df80",
  darkestOrange = "#812d00",
  darkerOrange = "#ac3c00",
  orange = "#c35a00",
  lighterOrange = "#df9700",
  lightestOrange = "#efcb80",
  darkestPurple = "#23001f",
  darkerPurple = "#2f0029",
  purple = "#4a0042",
  lighterPurple = "#890082",
  lightestPurple = "#c480c1"
)


#' `pennPalette`: Subset of `pennCol`.
#'
#' @keywords internal
#' @noRd
pennPalette <- pennCol[c(
  "lighterBlue",
  "red",
  "green",
  "yellow",
  "lighterPurple",
  "orange",
  "lightestBlue",
  "lighterRed",
  "lighterGreen",
  "lightestPurple",
  "lighterOrange"
)]

#' `pennLightPalette`: Subset of `pennCol`. This is the lighter Penn palette for use with
#' the dark themes
#'
#' @keywords internal
#' @noRd
pennLightPalette <- pennCol[c(
  "lightestBlue",
  "lightestRed",
  "lightestGreen",
  "lightestYellow",
  "lightestPurple"
)]


# CMU ----
#' CMU Colors
#'
#' `cmuCol`: Carnegie Mellon color palette
#' (https://www.cmu.edu/marcom/brand-standards/web-standards.html#colors)
#'
#' @keywords internal
#' @noRd
cmuCol <- c(
  cmuRed = "#bb0000",
  gray = "#e0e0e0",
  darkGray = "#666666",
  gold = "#aa6600",
  teal = "#006677",
  blue = "#224477",
  green = "#008855",
  darkGreen = "#224433"
)


# MIT ----
#' MIT Colors
#'
#' `mitCol`: MIT color palette
#' (http://web.mit.edu/graphicidentity/colors.html)
#'
#' @keywords internal
#' @noRd
mitCol <- c(
  red = "#A31F34",
  gray = "#8A8B8C",
  lightGray = "#C2C0BF"
)


# Princeton ----
#' Princeton Colors
#'
#' `princetonCol`: Princeton color palette
#' (https://communications.princeton.edu/guides-tools/logo-graphic-identity)
#'
#' @keywords internal
#' @noRd
princetonCol <- c(
  orangeOnWhite = "#e77500",
  orangeOnBlack = "#f58025"
)


# Columbia ----
#' Columbia Colors
#'
#' `columbiaCol`: Columbia color palette
#' (https://visualidentity.columbia.edu/content/web-0)
#'
#' @keywords internal
#' @noRd
columbiaCol <- c(
  blue = "#000d74",
  blue1 = "#C4D8E2",
  blue2 = "#75AADB",
  blue3 = "#6CADDF",
  blue4 = "#008EE0",
  blue5 = "#2C6BAC",
  blue6 = "#0046A6",
  white = "#F9F9F9",
  lightGray = "#EFEFEF",
  sandstone = "#D2D2C0",
  gray = "#555555",
  slate = "#41516C",
  tarawera = "#093552",
  yellow = "#FFB400",
  lightGreen = "#C0CD3F",
  lime = "#90C134",
  orange = "#C14D00",
  red = "#841C1C",
  purple = "#8E0F56"
)


# Brown ----
#' Brown Colors
#'
#' `brownCol`: Brown color palette
#' (https://www.brown.edu/university-identity/sites/university-identity/files/Brown_Visual_Identity_Policy_2016-07-22.pdf)
#'
#' @keywords internal
#' @noRd
brownCol <- c(
  red = "#ED1C24",
  brown = "#4E3629",
  gold = "#FFC72C",
  gray = "#98A4AE",
  skyBlue = "#59CBE8",
  emerald = "#00B398",
  navy = "#003C71",
  taupe = "#B7B09C"
)


# Yale ----
#' Yale Colors
#'
#' `yaleCol`: Yale color palette (https://yaleidentity.yale.edu/web)
#'
#' @keywords internal
#' @noRd
yaleCol <- c(
  yaleBlue = "#00356b",
  mediumBlue = "#286dc0",
  lightBlue = "#63aaff",
  darkestGray = "#222222",
  darkGray = "#4a4a4a",
  sandstone = "#978d85",
  lightGray = "#dddddd",
  lightestGray = "#f9f9f9",
  green = "#5f712d",
  orange = "#bd5319"
)


# Cornell ----
#' Cornell Colors
#'
#' `cornellCol`: Yale color palette
#' (https://brand.cornell.edu/design-center/colors/
#'
#' @keywords internal
#' @noRd
cornellCol <- c(
  carnellian = "#B31B1B",
  darkGrey = "#222222",
  lightGrey = "#F7F7F7",
  linkBlue = "#006699",
  greenGraphics = "#6EB43F",
  greenText = "#4B7B2B",
  greenLargeText = "#578E32",
  orangeGraphics = "#F8981D",
  orangeLargeText = "#D47500",
  redGraphics = "#EF4035",
  redText = "#DF1E12",
  navy = "#073949",
  darkWarmGrey = "#A2998B",
  seaGrey = "#9FAD9F"
)

# Harvard Medical School ----
#' HMS Colors
#'
#' `hmsCol`: Harvard Medical School color palette
#' (https://identityguide.hms.harvard.edu/color)
#'
#' @keywords internal
#' @noRd
hmsCol <- c(
  crimson = "#A51C30",
  black = "#1E1E1E",
  mortar = "#8C8179",
  parchment = "#F3F3F1",
  slate = "#8996A0",
  shade = "#BAC5C6",
  indigo = "#293352",
  blueBonnet = "#4E84C4",
  ivy = "#52854C",
  pear = "#C3D7A4",
  lemon = "#FFDB6D",
  saffron = "#D16103",
  gold = "#C4961A",
  creme = "#F4EDCA"
)


# Dartmouth ----
#' Dartmouth Colors
#'
#' `dartmouthCol`: Dartmouth color palette
#' (https://communications.dartmouth.edu/visual-identity/design-elements/color-palette#web%20palette)
#'
#' @keywords internal
#' @noRd
dartmouthCol <- c(
  dartmouthGreen = "#00693e",
  forestGreen = "#12312b",
  webGray1 = "#f7f7f7",
  webGray2 = "#e2e2e2",
  graniteGray = "#424141",
  autumnBrown = "#643c20",
  bonfireRed = "#9d162e",
  tuckOrange = "#e32d1c",
  summerYellow = "#f5dc69",
  springGreen = "#c4dd88",
  riverNavy = "#003c73",
  riverBlue = "#267aba",
  webViolet = "#8a6996",
  bonfireOrange = "#ffa00f"
)

# USF ----
#' USF Colors
#'
#' `usfCol`: USF color palette
#' (https://myusf.usfca.edu/marketing-communications/resources/graphics-resources/brand-standards/color-palette)
#' Color conversions performed using https://www.pantone.com/color-finder/
#'
#' @keywords internal
#' @noRd
usfCol <- c(
  green = "#205C40",
  yellow = "#ffb81c",
  gray = "#75787B"
)


# Washington ----
#' University of Washington Colors
#'
#' `uwCol`: University of Washington color palette
#' (http://www.washington.edu/brand/graphic-elements/primary-color-palette/)
#'
#' @keywords internal
#' @noRd
uwCol <- c(
  purple = "#4b2e83",
  gold = "#b7a57a",
  metallicGold = "#85754d"
)


# Johns Hopkins ----
#' Johns Hopkins University Colors
#'
#' `jhuCol`: Johns Hopkins University color palette
#' (https://brand.jhu.edu/color/)
#'
#' @keywords internal
#' @noRd
jhuCol <- c(
  heritageBlue = "#002d72",
  spiritBlue = "#68ace5",
  orange = "#cf4520",
  maroon = "#76232f",
  pink = "#a15a95",
  green = "#009b77",
  blue = "#0072ce",
  yellow = "#f1c400",
  pms7407c = "#cba052",
  pms1375c = "#ff9e1b",
  pms1505c = "#ff6900",
  pms7586c = "#9e5330",
  pms4625c = "#4f2c1d",
  pms486c = "#e8927c",
  pms187c = "#a6192e",
  pms262c = "#51284f",
  pms666c = "#a192b2",
  pms279c = "#418fde",
  pms564c = "#86c8bc",
  pms7734c = "#286140",
  pms7490c = "#719949"
)


# NYU ----
#' NYU Colors
#'
#' `nyuCol`: NYU color palette
#' (https://www.nyu.edu/employees/resources-and-services/media-and-communications/styleguide/website/graphic-visual-design.html)
#'
#' @keywords internal
#' @noRd
nyuCol <- c(
  brightPurple = "#8900e1",
  nyuPurple = "#57068c",
  darkerPurple = "#330662",
  darkestPurple = "#220337",
  mediumGray = "#6d6d6d",
  lightGray = "#b8b8b8",
  lighterGray = "#d6d6d6",
  lightestGray = "#f2f2f2",
  red = "#cb0200",
  orange = "#e86c00",
  green = "#489141",
  blue = "#28619e",
  lightBlue = "#3dbbdb",
  accentGreen = "#007c70",
  brightRed = "#d71e5e",
  brightOrange = "#e86c00",
  yellow = "#ffc107"
)

# WashU ----
#' Washington University St Louis Colors
#'
#' `washuCol`: WashU color palette
#' (https://marcomm.wustl.edu/resources/branding-logo-toolkit/color-palettes/)
#'
#' @keywords internal
#' @noRd
washuCol <- c(
  red = "#a51417",
  gray = "#6c7373",
  lightGray = "#c8c8c8",
  darkGray = "#3d3d3d",
  extraLightGRAY = "#eeeeee",
  green = "#007360",
  darkGreen = "#173e3a",
  lightGreen = "#789b4a",
  tan = "#e1c4ac",
  darkBlue = "#172752",
  blue = "#005f85",
  pearl = "#d8d2c5",
  yellow = "#ffcc00",
  orange = "#d15f27",
  darkOrange = "#b85323",
  purple = "#622466",
  lightTurqoise = "#67c8c7",
  turqoise = "#2b8282"
)


# Chicago ----
#' U Chicago Colors
#'
#' `chicagoCol`: University of Chicago color palette
#' (https://news.uchicago.edu/sites/default/files/attachments/_uchicago.identity.guidelines.pdf)
#'
#' @keywords internal
#' @noRd
chicagoCol <- c(
  maroon = "#800000",
  darkGray = "#767676",
  lightGray = "#D6D6CE",
  yellowTint = "#FFB547",
  yellowCore = "#FFA319",
  yellowShade = "#C68220",
  orangeTint = "#D49464",
  orangeCore = "#C16622",
  orangeShade = "#9A5324",
  redTint = "#B1746F",
  redCore = "#8F3931",
  redShade = "#642822",
  lightGreenTint = "#ADB17D",
  lightGreenCore = "#8A9045",
  lightGreenShade = "#616530",
  darkGreenTint = "#8A8B79",
  darkGreenCore = "#58593F",
  darkGreenShade = "#3E3E23",
  blueTint = "#5B8FA8",
  blueCore = "#155F83",
  blueShade = "#0F425C",
  violetTint = "#725663",
  violetCore = "#350E20",
  cyan = "#47B5FF",
  magenta = "#FF3399"
)


# Penn State ----
#' Penn State Colors
#'
#' `texasCol`: Penn State color palette
#' (https://brand.psu.edu/design-essentials.html#color)
#'
#' @keywords internal
#' @noRd
pennstateCol <- c(
  nittanyNavy = "#001E44",
  beaverBlue = "#1E407C",
  pennsylvaniaSky = "#009CDE",
  limestone = "#91959C",
  creek = "#3EA39E",
  slate = "#314D64",
  pennsForest = "#4A7729",
  oldCoaly = "#54585A",
  landGrant = "#6A3028",
  lionsRoar = "#BF8226",
  lionShrine = "#B88965",
  statelyAtherton = "#AC8DCE",
  pughBlue = "#96BEE6",
  original1887 = "#BC204B",
  brightkeystone = "#FFD100",
  inventOrange = "#E98300",
  dawnOfDiscovery = "#F2665E",
  perpetualWonder = "#491D70",
  greenOpportunity = "#008755",
  futuresCalling = "#99CC00"
)


# SFSU ----
#' SF State
#'
#' `sfsuCol`: SF State color palette
#' (https://logo.sfsu.edu/color-system)
#'
#' @keywords internal
#' @noRd
sfsuCol <- c(
  `2755C` = "#231161",
  `2755C_85pc` = "#463077",
  `117C` = "#C99700",
  `117C_60pc` = "#E9D597",
  `3025C` = "#004F71",
  `383C` = "#ABAD00",
  `7419C` = "#B04A5A",
  `484C` = "#9A3324",
  coolGray11 = "#53565A"
)

# U Illinois ----
#' University of Illinois Colors
#'
#' `illinoisCol`: University of Illinois color palette
#' (https://www.uillinois.edu/OUR/brand/color_palettes)
#'
#' @keywords internal
#' @noRd
illinoisCol <- c(
  uofiblue = "#13294b",
  urbanaOrange = "#E84A27",
  uicRed = "#D50032",
  uisBlue = "#003366",
  teal = "#0d605e",
  grayBlue = "#6fafc7",
  citron = "#bfd46d",
  darkYellow = "#ffd125",
  salmon = "#ee5e5e",
  periwinkle = "#4f6898",
  gray = "#E8E9EA",
  coolGray6 = "#A5A8AA",
  coolGray1 = "#5E6669",
  secondaryBlue1 = "#0455A4",
  secondaryBlue2 = "#1F4096"
)

# U Maryland ----
#' University of Maryland Colors
#'
#' `umdCol`: University of Maryland color palette
#' (https://osc.umd.edu/licensing-trademarks/brand-standards/logos/#color)
#'
#' @keywords internal
#' @noRd
umdCol <- c(
  umdRed = "#E21833",
  umdYellow = "#ffd200",
  umdBrown = "#AD7C59"
)

# MSU ----
#' Michigan State University Colors
#'
#' `msuCol`: MSU color palette
#' (https://brand.msu.edu/visual/color-palette)
#'
#' @keywords internal
#' @noRd
msuCol <- c(
  SpartanGreen = "#18453B",
  White = "#FFFFFF",
  Black = "#000000",
  KellyGreen = "#008208",
  LimeGreen = "#7BBD00",
  ExcellenceGreen = "#0B9A6D"
)

# Michigan ----
#' University of Michigan Colors
#'
#' `michiganCol`: Michigan color palette
#' (https://brand.umich.edu/design-resources/colors/)
#'
#' @keywords internal
#' @noRd
michiganCol <- c(
  Maize = "#FFCB05",
  Blue = "#00274C",
  TappanRed = "#9A3324",
  RossOrange = "#D86018",
  RackhamGreen = "#75988d",
  WaveFieldGreen = "#A5A508",
  TaubmanTeal = "#00B2A9",
  ArboretumBlue = "#2F65A7",
  A2Amethyst = "#702082",
  MatthaeiViolet = "#575294",
  UMMATan = "#CFC096",
  BurtonTowerBeige = "#9B9A6D",
  AngelHallAsh = "#989C97",
  LawQuadStone = "#655A52",
  PumaBlack = "#131516"
)

# Iowa ----
#' Univeristy of Iowa Colors
#'
#' `iowaCol`: University of Iowa color palette
#' (https://brand.uiowa.edu/color)
#'
#' @keywords internal
#' @noRd
iowaCol <- c(
  Gold = "#FFCD00",
  Gray = "#BBBCBC",
  Blue = "#00A9E0",
  Green = "#00AF66",
  Orange = "#FF8200",
  DarkGray = "#63666A",
  DarkBlue = "#00558C",
  DarkGreen = "#00664F",
  DarkOrange = "#BD472A"
)

# U Texas ----
#' U Texas Colors
#'
#' `texasCol`: University of Texas color palette
#' (https://brand.utexas.edu/identity/color/)
#'
#' @keywords internal
#' @noRd
texasCol <- c(
  burntOrange = "#bf5700",
  gray = "#333f48",
  brightOrange = "#f8971f",
  yellow = "#ffd600",
  lightGreen = "#a6cd57",
  green = "#579d42",
  teal = "#00a9b7",
  blue = "#005f86",
  lightBlue = "#9cadb7",
  stone = "#d6d2c4"
)


# Emory ----
#' Emory Colors
#'
#' `emoryCol`: Emory color palette
#' (https://brand.emory.edu/color.html)
#'
#' @keywords internal
#' @noRd
emoryCol <- c(
  emoryBlue = "#012169",
  darkBlue = "#0c2340",
  mediumBlue = "#0033a0",
  lightBlue = "#007dba",
  yellow = "#f2a900",
  gold = "#b58500",
  metallicGold = "#84754e",
  cyan = "#00aeef",
  skyBlue = "#41b6e6",
  teal = "#487f84",
  kellyGreen = "#348338",
  seaGreen = " #006c5b",
  olive = "#5c8118",
  orange = "#c35413",
  red = "#da291c",
  magenta = "#c6007e",
  purple = "#6558b1",
  grape = "#6d2077",
  black = "#101820",
  coolGray5 = "#b1b3b3",
  coolGray2 = "#d0d0ce",
  coolGray1 = "#d9d9d6"
)

# Georgia Tech ----
#' Georgia Tech Colors
#'
#' `techCol`: Georgia Tech color palette
#' (http://www.licensing.gatech.edu/super-block/239)
#'
#' @keywords internal
#' @noRd
techCol <- c(
  techGold = "#B3A369",
  buzzGold = "#EAAA00",
  blue = "#00263A"
)


# Vanderbilt ----
#' Vanderbilt Color
#'
#' `vanderbiltCol`: Vanderbilt color palette
#' (https://www.vanderbilt.edu/communications/brand/color.php)
#'
#' @keywords internal
#' @noRd
vanderbiltCol <- c(
  gold = "#D8AB4C",
  blue = "#006682",
  red = "#993D1B",
  darkGray = "#333333",
  green = "#464E21",
  lightBlue = "#CCE0E6",
  lightRed = "#EBD8D1",
  lightGray = "#DDDDDD",
  lightGreen = "#DADCD3"
)

# Jefferson ----
#' Jefferson University Colors
#'
#' `jeffersonCol`: Jefferson color palette (http://creative.jefferson.edu/downloads/Jefferson-Brand-Guidelines.pdf)
#'
#' @keywords internal
#' @noRd
jeffersonCol <- c(
  jeffDeepBlue = "#152456",
  jeffBrightBlue = "#59B7df",
  legacyMaroon = "#9f2943",
  red = "#e53e30",
  voltGreen = "#ece819",
  silver = "#dfe1df",
  darkGray = "#8e9089",
  black = "#231f20"
)

# Hawaii ----
#' University of Hawaii Colors
#'
#' `hawaiiCol`: University of Hawaii color palette (https://www.hawaii.edu/offices/eaur/graphicsstandards.pdf)
#'
#' @keywords internal
#' @noRd
hawaiiCol <- c(
  manoa = "#024731",
  hilo = "#DA291C",
  westOahu = "#A71930",
  hawaiiCC = "#91004B",
  honoluluCC = "#00747A",
  kapiolaniCC = "#002395",
  kauaiCC = "#716FB3",
  leeward = "#3D7EDB",
  mauiCC = "#005172",
  windward = "#7AB800",
  system = "#B3995D"
)

# NIH ----
#' NIH Colors
#'
#' `nihCol`: NIH color palette (https://www.nlm.nih.gov/about/nlm_logo_guidelines_030414_508.pdf)
#'
#' @keywords internal
#' @noRd
nihCol <- c(
  blue = "#20558a",
  gray = "#616265"
)

# Imperial ----
#' Imperial Colours
#'
#' `imperialCol`: Imperial College London colour palette
#' (https://www.imperial.ac.uk/brand-style-guide/visual-identity/brand-colours/)
#'
#' @keywords internal
#' @noRd
imperialCol <- c(
  navy = "#002147",
  imperialBlue = "#003E74",
  lightGrey = "#EBEEEE",
  coolGrey = "#9D9D9D",
  lightBlue = "#D4EFFC",
  blue = "#006EAF",
  processBlue = "#0091D4",
  poolBlue = "#00ACD7",
  darkTeal = "#0F8291",
  teal = "#009CBC",
  seaglass = "#379f9f",
  darkGreen = "#02893B",
  kermitGreen = "#66A40A",
  lime = "#BBCE00",
  orange = "#D24000",
  tangerine = "#EC7300",
  lemonYellow = "#FFDD00",
  brick = "#A51900",
  red = "#DD2501",
  cherry = "#E40043",
  raspberry = "#9F004E",
  magentaPink = "#C81E78",
  iris = "#751E66",
  violet = "#960078",
  plum = "#321E6D",
  purple = "#653098"
)

# UCL ----
#' UCL Colours
#'
#' `uclCol`: UCL colour palette (https://www.ucl.ac.uk/cam/brand/guidelines/colour)
#'
#' @keywords internal
#' @noRd
uclCol <- c(
  darkGreen = "#555025",
  darkRed = "#651D32",
  darkPurple = "#4B384C",
  darkBlue = "#003D4C",
  darkBrown = "#4E3629",
  midGreen = "#8F993E",
  midRed = "#93272C",
  midPurple = "#500778",
  midBlue = "#002855",
  stone = "#D6D2C4",
  brightGreen = "#B5BD00",
  brightRed = "#D50032",
  brightBlue = "#0097A9",
  brightPink = "#AC145A",
  lightGreen = "#BBC592",
  lightRed = "#E03C31",
  lightPurple = "#C6B0BC",
  lightBlue = "#8DB9CA",
  yellow = "#F6BE00",
  orange = "#EA7600",
  grey = "#8C8279",
  blueCeleste = "#A4DBE8",
  IOEblue = "#24509A"
)

# Oxford ----
#' Oxford Colours
#'
#' `oxfordCol`: Oxford University colour palette (https://www.ox.ac.uk/sites/files/oxford/media_wysiwyg/Oxford%20Blue%20LR.pdf)
#'
#' @keywords internal
#' @noRd
oxfordCol <- c(
  oxfordBlue = "#002147",
  pantone279 = "#4891DC",
  pantone291 = "#9ECEEB",
  pantone5405 = "#44687D",
  pantone549 = "#5F9BAF",
  pantone551 = "#A1C4D0",
  pantone562 = "#007770",
  pantone624 = "#7BA296",
  pantone559 = "#BCD2C3",
  pantone576 = "#69913B",
  pantone578 = "#B9CF96",
  pantone580 = "#CEDBAF",
  pantone583 = "#AAB300",
  pantone585 = "#DBDE72",
  pantone587 = "#E3E597",
  pantone7412 = "#CF7A30",
  pantone129 = "#F5CF47",
  pantone127 = "#F3DE74",
  pantone202 = "#872434",
  pantone200 = "#BE0F34",
  pantone196 = "#EBC4CB",
  pantoneWarmGray6 = "#A79D96",
  pantoneWarmGray3 = "#C7C2BC",
  pantoneWarmGray1 = "#E0DED9"
)


# NHS ----
#' NHS Colours
#'
#' `nhsCol`: NHS colour palette (https://www.england.nhs.uk/nhsidentity/identity-guidelines/colours/)
#'
#' @keywords internal
#' @noRd
nhsCol <- c(
  nhsDarkBlue = "#003087",
  nhsBlue = "#005EB8",
  nhsBrightBlue = "#0072CE",
  nhsLightBlue = "#41B6E6",
  nhsAquaBlue = "#00A9CE",
  nhsBlack = "#231f20",
  nhsDarkGrey = "#425563",
  nhsMidGrey = "#768692",
  nhsPaleGrey = "#E8EDEE",
  nhsDarkGreen = "#006747",
  nhsGreen = "#009639",
  nhsLightGreen = "#78BE20",
  nhsAquaGreen = "#00A499",
  nhsPurple = "#330072",
  nhsDarkPink = "#7C2855",
  nhsPink = "#AE2573",
  nhsDarkRed = "#8A1538",
  emergencyServicesRed = "#DA291C",
  nhsOrange = "#ED8B00",
  nhsWarmYellow = "#FFB81C",
  nhsYellow = "#FAE100"
)

# UBC ----
#' UBC Colors
#'
#' `ubcCol`: UBC color palette (http://assets.brand.ubc.ca/downloads/ubc_colour_guide.pdf)
#'
#' @keywords internal
#' @noRd
ubcCol <- c(
  ubcBlue = "#002145",
  blue2 = "#0055B7",
  blue3 = "#00A7E1",
  blue4 = "#40B4E5",
  blue5 = "#6EC4E8",
  blue6 = "#97D4E9"
)

# U Toronto ----
#' U Toronto Colors
#'
#' `torontoCol`: U Toronto color palette (https://trademarks.utoronto.ca/colors-fonts/)
#'
#' @keywords internal
#' @noRd
torontoCol <- c(
  blue = "#002043",
  red = "#bb133e"
)

# McGill ----
#' McGill Colors
#'
#' `mcgillCol`: McGill color palette (https://www.mcgill.ca/visual-identity/visual-identity-guide)
#'
#' @keywords internal
#' @noRd
mcgillCol <- c(
  mcgillRed = "#ED1B2F",
  grey = "#5D6770",
  pastelOrange = "#FFD794",
  brightOrange = "#F7941D",
  mutedOrange = "#D3674A",
  darkOrange = "#AA4B31",
  pastelYellow = "#FFF193",
  brightYellow = "#FFD400",
  mutedYellow = "#E8B92E",
  darkYellow = "#B28C35",
  pastelTeal = "#B5E1E1",
  brightTeal = "#27BDBE",
  mutedTeal = "#087F8C",
  darkTeal = "#0A6266",
  pastelBlue = "#C8EAF5",
  brightBlue = "#44C8F5",
  mutedBlue = "#0096C9",
  darkBlue = "#024F6D",
  pastelGreen = "#D5E6A8",
  brightGreen = "#B2D235",
  mutedGreen = "#8BA04E",
  darkGreen = "#305534",
  pastelPink = "#E2A7CC",
  brightPink = "#C768A9",
  mutedPink = "#9B5678",
  darkPink = "#673567",
  darkRed = "#9E0918"
)

# ETH ----
#' ETH Colours
#'
#' `ethCol`: ETH color palette (https://ethz.ch/services/en/service/communication/corporate-design/colour.html)
#'
#' @keywords internal
#' @noRd
ethCol <- c(
  eth1 = "#1F407A",
  eth2 = "#3C5A0F",
  eth3 = "#0069B4",
  eth4 = "#72791C",
  eth5 = "#91056A",
  eth6 = "#6F6F6E",
  eth7 = "#A8322D",
  eth8 = "#007A92",
  eth9 = "#956013",
  eth10 = "#82BE1E"
)

# RWTH Aachen ----
#' RWTH Aachen Colours
#'
#' `rwthCol`: RWTH Aachen color palette (http://www9.rwth-aachen.de/global/show_document.asp?id=aaaaaaaaaadpbhq)
#'
#' @keywords internal
#' @noRd
rwthCol <- c(
  blau1 = "#00549F",
  blau2 = "#407FB7",
  blau3 = "#8EBAE5",
  blau4 = "#C7DDF2",
  blau5 = "#E8F1FA",
  magenta1 = "#E30066",
  magenta2 = "#E96088",
  magenta3 = "#F19EB1",
  magenta4 = "#F9D2DA",
  magenta5 = "#FDEEF0",
  gelb1 = "#FFED00",
  gelb2 = "#FFF055",
  gelb3 = "#FFF59B",
  gelb4 = "#FFFAD1",
  gelb5 = "#FFFDEE",
  petrol1 = "#006165",
  petrol2 = "#2D7F83",
  petrol3 = "#7DA4A7",
  petrol4 = "#BFD0D1",
  petrol5 = "#E6ECEC",
  tuerkis1 = "#0098A1",
  tuerkis2 = "#00B1B7",
  tuerkis3 = "#89CCCF",
  tuerkis4 = "#CAE7E7",
  tuerkis5 = "#EBF6F6",
  gruen1 = "#57AB27",
  gruen2 = "#8DC060",
  gruen3 = "#B8D698",
  gruen4 = "#DDEBCE",
  gruen5 = "#F2F7EC",
  maigruen1 = "#BDCD00",
  maigruen2 = "#D0D95C",
  maigruen3 = "#E0E69A",
  maigruen4 = "#F0F3D0",
  maigruen5 = "#F9FAED",
  orange1 = "#F6A800",
  orange2 = "#FABE50",
  orange3 = "#FDD48F",
  orange4 = "#FEEAC9",
  orange5 = "#FFF7EA",
  rot1 = "#CC071E",
  rot2 = "#D85C41",
  rot3 = "#E69679",
  rot4 = "#F3CDBB",
  rot5 = "#FAEBE3",
  bordeaux1 = "#A11035",
  bordeaux2 = "#B65256",
  bordeaux3 = "#CD8B87",
  bordeaux4 = "#E5C5C0",
  bordeaux5 = "#F5E8E5",
  violett1 = "#612158",
  violett2 = "#834E75",
  violett3 = "#A8859E",
  violett4 = "#D2C0CD",
  violett5 = "#EDE5EA",
  lila1 = "#7A6FAC",
  lila2 = "#9B91C1",
  lila3 = "#BCB5D7",
  lila4 = "#DEDAEB",
  lila5 = "#F2F0F7"
)

# Mozilla ----
#' Mozilla Colors
#'
#' `mozillaCol`: Mozilla design colors
#' (https://mozilla.design/mozilla/color/)
#'
#' @keywords internal
#' @noRd
mozillaCol <- c(
  neonBlue = "#00ffff",
  lemonYellow = "#fff44f",
  warmRed = "#ff4f5e",
  neonGreen = "#54ffbd",
  darkPurple = "#6e008b",
  darkGreen = "#005e5e",
  darkBlue = "#00458b",
  lightGrey = "#e7e5e2"
)

# Firefox ----
#' Firefox Colors
#'
#' `firefoxCol`: Firefox design colors
#' (https://mozilla.design/firefox/color/)
#'
#' @keywords internal
#' @noRd
firefoxCol <- c(
  Green = "#53FEBE",
  Blue = "#0290EE",
  Purple = "#AC71FF",
  Light_Purple = "#D64CF1",
  Magenta = "#FE4AA3",
  Salmon = "#FF6A75",
  Orange = "#FE8A4F",
  Yellow = "#FFBD4F"
)

# Apple ----
#' Apple Colors
#'
#' `appleCol`: Apple Human Interface Guidelines color palette
#' (https://developer.apple.com/design/human-interface-guidelines/ios/visual-design/color/)
#'
#' @keywords internal
#' @noRd
appleCol <- c(
  red = "#FF3B30",
  orange = "#FF9500",
  yellow = "#FFCC00",
  green = "#4CD964",
  tealBlue = "#5AC8FA",
  blue = "#007AFF",
  purple = "#5856D6",
  pink = "#FF2D55"
)


# Google ----
#' Google Colors
#'
#' `googleCol`: Google brand palette (https://brandpalettes.com/google-colors/)
#'
#' @keywords internal
#' @noRd
googleCol <- c(
  blue = "#4285F4",
  red = "#DB4437",
  yellow = "#F4B400",
  green = "#0F9D58"
)

# Amazon ----
#' Amazon Colors
#'
#' `amazonCol`: Amazon brand palette
#' (https://images-na.ssl-images-amazon.com/images/G/01/AdvertisingSite/pdfs/AmazonBrandUsageGuidelines.pdf)
#'
#' @keywords internal
#' @noRd
amazonCol <- c(
  orange = "#FF9900",
  blue = "#146EB4"
)

# Microsoft ----
#' Microsoft Colors
#'
#' `microsoftCol`: Microsoft brand palette
#' (https://brandcolors.net/b/microsoft)
#'
#' @keywords internal
#' @noRd
microsoftCol <- c(
  orange = "#f65314",
  green = "#7cbb00",
  blue = "#00a1f1",
  yellow = "#ffbb00"
)

# rtemis palettes ----
rtCol1 <- desaturate(
  c(
    ucsfCol[["C3_Teal"]],
    ucsfCol[["M3_Orange"]],
    pennCol[["lighterRed"]],
    pennCol[["lighterBlue"]],
    pennCol[["lighterOrange"]],
    pennCol[["lighterPurple"]],
    ucsfCol[["A3_CTA_Blue"]],
    pennCol[["lightestOrange"]],
    pennCol[["lightestPurple"]],
    pennCol[["blue"]],
    pennCol[["red"]],
    pennCol[["orange"]],
    pennCol[["purple"]]
  ),
  .3
)

rtCol1n <- desaturate(
  c(
    ucsfCol[["C3_Teal"]],
    ucsfCol[["M3_Orange"]],
    pennCol[["lighterBlue"]],
    pennCol[["lighterRed"]],
    pennCol[["lighterOrange"]],
    pennCol[["lighterPurple"]],
    pennCol[["lightestBlue"]],
    ucsfCol[["G4"]],
    pennCol[["lightestOrange"]],
    pennCol[["lightestPurple"]],
    pennCol[["blue"]],
    pennCol[["red"]],
    pennCol[["orange"]],
    pennCol[["purple"]]
  ),
  .3
)

rtCol2 <- c(
  ucsfCol[["C3_Teal"]],
  ucsfCol[["M3_Orange"]],
  ucsfCol[["H2"]],
  ucsfCol[["A2"]],
  ucsfCol[["C4"]],
  ucsfCol[["L3_Yellow"]],
  ucsfCol[["H3_Magenta"]],
  ucsfCol[["A3_CTA_Blue"]]
)

rtms <- c(
  teal = ucsfCol[["C3_Teal"]],
  orange = ucsfCol[["M3_Orange"]],
  red = pennCol[["lighterRed"]],
  blue = pennCol[["lighterBlue"]],
  lighter_teal = ucsfCol[["C4"]],
  yellow = ucsfCol[["L3_Yellow"]],
  magenta = ucsfCol[["H3_Magenta"]],
  lighter_blue = ucsfCol[["A3_CTA_Blue"]]
) |>
  desaturate()

rtcoldev <- c(
  rtemisblue = "#80ffff",
  rtemisbluetoo = "#00D6FF",
  lavender = "#ff80ffff",
  orange = "#ffb200ff"
)

grays <- c(
  Gray10 = "gray10",
  Gray30 = "gray30",
  Gray50 = "gray50",
  Gray70 = "gray70",
  Gray90 = "gray90"
)

# Pantone 2022 ----
pantoneBalancingAct <- c(
  Granite_Green = "#86A293",
  Muted_Clay = "#D29381",
  Very_Peri = "#6667AB",
  Hawthorne_Rose = "#884C5E",
  Dried_Moss = "#CCB97E",
  Elderberry = "#9D848E",
  Lotus = "#E3C1C0",
  Burnished_Lilac = "#C4AEB1"
)

pantoneWellspring <- c(
  Eggshell_Blue = "#A1CAC9",
  Celery = "#CFBF54",
  Dewberry = "#8C5896",
  Chai_tea = "#B3832F",
  Greenbrier = "#48996B",
  Very_Peri = "#6667AB",
  Treetop = "#436A2F",
  Foliage = "#759F51"
)

pantoneAmusements <- c(
  Tawny_Orange = "#D77E6F",
  Very_Peri = "#6667AB",
  Iced_Coffee = "#B38F6A",
  Pink_Flambe = "#D75078",
  Fuchsia_Pink = "#E288B6",
  Paradise_Pink = "#E9445D",
  Cornsilk = "#EEC272",
  Tourmaline = "#85A0A9"
)

# rtemis_palettes ----
rtemis_palettes <- list(
  ucsfCol = ucsfCol,
  pennCol = pennCol,
  imperialCol = imperialCol,
  stanfordCol = stanfordCol,
  ucdCol = ucdCol,
  berkeleyCol = berkeleyCol,
  ucscCol = ucscCol,
  ucmercedCol = ucmercedCol,
  ucsbCol = ucsbCol,
  uclaCol = uclaCol,
  ucrColor = ucrColor,
  uciCol = uciCol,
  ucsdCol = ucsdCol,
  ucCol = ucCol,
  scrippsCol = scrippsCol,
  caltechCol = caltechCol,
  cmuCol = cmuCol,
  princetonCol = princetonCol,
  columbiaCol = columbiaCol,
  yaleCol = yaleCol,
  brownCol = brownCol,
  cornellCol = cornellCol,
  hmsCol = hmsCol,
  dartmouthCol = dartmouthCol,
  usfCol = usfCol,
  uwCol = uwCol,
  jhuCol = jhuCol,
  nyuCol = nyuCol,
  washuCol = washuCol,
  chicagoCol = chicagoCol,
  pennstateCol = pennstateCol,
  msuCol = msuCol,
  michiganCol = michiganCol,
  iowaCol = iowaCol,
  texasCol = texasCol,
  techCol = techCol,
  jeffersonCol = jeffersonCol,
  hawaiiCol = hawaiiCol,
  nihCol = nihCol,
  torontoCol = torontoCol,
  mcgillCol = mcgillCol,
  uclCol = uclCol,
  oxfordCol = oxfordCol,
  nhsCol = nhsCol,
  ethCol = ethCol,
  rwthCol = rwthCol,
  firefoxCol = firefoxCol,
  mozillaCol = mozillaCol,
  appleCol = appleCol,
  googleCol = googleCol,
  amazonCol = amazonCol,
  microsoftCol = microsoftCol,
  pantoneBalancingAct = pantoneBalancingAct,
  pantoneWellspring = pantoneWellspring,
  pantoneAmusements = pantoneAmusements,
  grays = grays,
  rtCol1 = rtCol1,
  rtms = rtms
)

#' Get Color Palette
#'
#' `get_palette()` returns a color palette (character vector of colors).
#' Without arguments, prints names of available color palettes.
#' Each palette is a named list of hexadecimal color definitions which can be used with
#' any graphics function.
#'
#' @param palette Character: Name of palette to return. Default = NULL: available palette
#' names are printed and no palette is returned.
#' @param verbosity Integer: Verbosity level.
#'
#' @return Character vector of colors for the specified palette, or invisibly returns
#' list of available palettes if `palette = NULL`.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # Print available palettes
#' get_palette()
#' # Get the Imperial palette
#' get_palette("imperial")
get_palette <- function(palette = NULL, verbosity = 1L) {
  if (is.null(palette)) {
    if (verbosity > 0L) {
      msg(highlight("The following palettes are available:"))
      print(names(rtemis_palettes))
    }
  } else {
    palette <- match.arg(
      palette,
      names(rtemis_palettes)
    )
    rtemis_palettes[[palette]]
  }
} # /rtemis::get_palette


# Xterm Colors ----
XtermCol <- c(
  `Black (SYSTEM)` = "#000000",
  `Maroon (SYSTEM)` = "#800000",
  `Green (SYSTEM)` = "#008000",
  `Olive (SYSTEM)` = "#808000",
  `Navy (SYSTEM)` = "#000080",
  `Purple (SYSTEM)` = "#800080",
  `Teal (SYSTEM)` = "#008080",
  `Silver (SYSTEM)` = "#c0c0c0",
  `Grey (SYSTEM)` = "#808080",
  `Red (SYSTEM)` = "#ff0000",
  `Lime (SYSTEM)` = "#00ff00",
  `Yellow (SYSTEM)` = "#ffff00",
  `Blue (SYSTEM)` = "#0000ff",
  `Fuchsia (SYSTEM)` = "#ff00ff",
  `Aqua (SYSTEM)` = "#00ffff",
  `White (SYSTEM)` = "#ffffff",
  Grey0 = "#000000",
  NavyBlue = "#00005f",
  DarkBlue = "#000087",
  Blue3 = "#0000af",
  Blue3 = "#0000d7",
  Blue1 = "#0000ff",
  DarkGreen = "#005f00",
  DeepSkyBlue4 = "#005f5f",
  DeepSkyBlue4 = "#005f87",
  DeepSkyBlue4 = "#005faf",
  DodgerBlue3 = "#005fd7",
  DodgerBlue2 = "#005fff",
  Green4 = "#008700",
  SpringGreen4 = "#00875f",
  Turquoise4 = "#008787",
  DeepSkyBlue3 = "#0087af",
  DeepSkyBlue3 = "#0087d7",
  DodgerBlue1 = "#0087ff",
  Green3 = "#00af00",
  SpringGreen3 = "#00af5f",
  DarkCyan = "#00af87",
  LightSeaGreen = "#00afaf",
  DeepSkyBlue2 = "#00afd7",
  DeepSkyBlue1 = "#00afff",
  Green3 = "#00d700",
  SpringGreen3 = "#00d75f",
  SpringGreen2 = "#00d787",
  Cyan3 = "#00d7af",
  DarkTurquoise = "#00d7d7",
  Turquoise2 = "#00d7ff",
  Green1 = "#00ff00",
  SpringGreen2 = "#00ff5f",
  SpringGreen1 = "#00ff87",
  MediumSpringGreen = "#00ffaf",
  Cyan2 = "#00ffd7",
  Cyan1 = "#00ffff",
  DarkRed = "#5f0000",
  DeepPink4 = "#5f005f",
  Purple4 = "#5f0087",
  Purple4 = "#5f00af",
  Purple3 = "#5f00d7",
  BlueViolet = "#5f00ff",
  Orange4 = "#5f5f00",
  Grey37 = "#5f5f5f",
  MediumPurple4 = "#5f5f87",
  SlateBlue3 = "#5f5faf",
  SlateBlue3 = "#5f5fd7",
  RoyalBlue1 = "#5f5fff",
  Chartreuse4 = "#5f8700",
  DarkSeaGreen4 = "#5f875f",
  PaleTurquoise4 = "#5f8787",
  SteelBlue = "#5f87af",
  SteelBlue3 = "#5f87d7",
  CornflowerBlue = "#5f87ff",
  Chartreuse3 = "#5faf00",
  DarkSeaGreen4 = "#5faf5f",
  CadetBlue = "#5faf87",
  CadetBlue = "#5fafaf",
  SkyBlue3 = "#5fafd7",
  SteelBlue1 = "#5fafff",
  Chartreuse3 = "#5fd700",
  PaleGreen3 = "#5fd75f",
  SeaGreen3 = "#5fd787",
  Aquamarine3 = "#5fd7af",
  MediumTurquoise = "#5fd7d7",
  SteelBlue1 = "#5fd7ff",
  Chartreuse2 = "#5fff00",
  SeaGreen2 = "#5fff5f",
  SeaGreen1 = "#5fff87",
  SeaGreen1 = "#5fffaf",
  Aquamarine1 = "#5fffd7",
  DarkSlateGray2 = "#5fffff",
  DarkRed = "#870000",
  DeepPink4 = "#87005f",
  DarkMagenta = "#870087",
  DarkMagenta = "#8700af",
  DarkViolet = "#8700d7",
  Purple = "#8700ff",
  Orange4 = "#875f00",
  LightPink4 = "#875f5f",
  Plum4 = "#875f87",
  MediumPurple3 = "#875faf",
  MediumPurple3 = "#875fd7",
  SlateBlue1 = "#875fff",
  Yellow4 = "#878700",
  Wheat4 = "#87875f",
  Grey53 = "#878787",
  LightSlateGrey = "#8787af",
  MediumPurple = "#8787d7",
  LightSlateBlue = "#8787ff",
  Yellow4 = "#87af00",
  DarkOliveGreen3 = "#87af5f",
  DarkSeaGreen = "#87af87",
  LightSkyBlue3 = "#87afaf",
  LightSkyBlue3 = "#87afd7",
  SkyBlue2 = "#87afff",
  Chartreuse2 = "#87d700",
  DarkOliveGreen3 = "#87d75f",
  PaleGreen3 = "#87d787",
  DarkSeaGreen3 = "#87d7af",
  DarkSlateGray3 = "#87d7d7",
  SkyBlue1 = "#87d7ff",
  Chartreuse1 = "#87ff00",
  LightGreen = "#87ff5f",
  LightGreen = "#87ff87",
  PaleGreen1 = "#87ffaf",
  Aquamarine1 = "#87ffd7",
  DarkSlateGray1 = "#87ffff",
  Red3 = "#af0000",
  DeepPink4 = "#af005f",
  MediumVioletRed = "#af0087",
  Magenta3 = "#af00af",
  DarkViolet = "#af00d7",
  Purple = "#af00ff",
  DarkOrange3 = "#af5f00",
  IndianRed = "#af5f5f",
  HotPink3 = "#af5f87",
  MediumOrchid3 = "#af5faf",
  MediumOrchid = "#af5fd7",
  MediumPurple2 = "#af5fff",
  DarkGoldenrod = "#af8700",
  LightSalmon3 = "#af875f",
  RosyBrown = "#af8787",
  Grey63 = "#af87af",
  MediumPurple2 = "#af87d7",
  MediumPurple1 = "#af87ff",
  Gold3 = "#afaf00",
  DarkKhaki = "#afaf5f",
  NavajoWhite3 = "#afaf87",
  Grey69 = "#afafaf",
  LightSteelBlue3 = "#afafd7",
  LightSteelBlue = "#afafff",
  Yellow3 = "#afd700",
  DarkOliveGreen3 = "#afd75f",
  DarkSeaGreen3 = "#afd787",
  DarkSeaGreen2 = "#afd7af",
  LightCyan3 = "#afd7d7",
  LightSkyBlue1 = "#afd7ff",
  GreenYellow = "#afff00",
  DarkOliveGreen2 = "#afff5f",
  PaleGreen1 = "#afff87",
  DarkSeaGreen2 = "#afffaf",
  DarkSeaGreen1 = "#afffd7",
  PaleTurquoise1 = "#afffff",
  Red3 = "#d70000",
  DeepPink3 = "#d7005f",
  DeepPink3 = "#d70087",
  Magenta3 = "#d700af",
  Magenta3 = "#d700d7",
  Magenta2 = "#d700ff",
  DarkOrange3 = "#d75f00",
  IndianRed = "#d75f5f",
  HotPink3 = "#d75f87",
  HotPink2 = "#d75faf",
  Orchid = "#d75fd7",
  MediumOrchid1 = "#d75fff",
  Orange3 = "#d78700",
  LightSalmon3 = "#d7875f",
  LightPink3 = "#d78787",
  Pink3 = "#d787af",
  Plum3 = "#d787d7",
  Violet = "#d787ff",
  Gold3 = "#d7af00",
  LightGoldenrod3 = "#d7af5f",
  Tan = "#d7af87",
  MistyRose3 = "#d7afaf",
  Thistle3 = "#d7afd7",
  Plum2 = "#d7afff",
  Yellow3 = "#d7d700",
  Khaki3 = "#d7d75f",
  LightGoldenrod2 = "#d7d787",
  LightYellow3 = "#d7d7af",
  Grey84 = "#d7d7d7",
  LightSteelBlue1 = "#d7d7ff",
  Yellow2 = "#d7ff00",
  DarkOliveGreen1 = "#d7ff5f",
  DarkOliveGreen1 = "#d7ff87",
  DarkSeaGreen1 = "#d7ffaf",
  Honeydew2 = "#d7ffd7",
  LightCyan1 = "#d7ffff",
  Red1 = "#ff0000",
  DeepPink2 = "#ff005f",
  DeepPink1 = "#ff0087",
  DeepPink1 = "#ff00af",
  Magenta2 = "#ff00d7",
  Magenta1 = "#ff00ff",
  OrangeRed1 = "#ff5f00",
  IndianRed1 = "#ff5f5f",
  IndianRed1 = "#ff5f87",
  HotPink = "#ff5faf",
  HotPink = "#ff5fd7",
  MediumOrchid1 = "#ff5fff",
  DarkOrange = "#ff8700",
  Salmon1 = "#ff875f",
  LightCoral = "#ff8787",
  PaleVioletRed1 = "#ff87af",
  Orchid2 = "#ff87d7",
  Orchid1 = "#ff87ff",
  Orange1 = "#ffaf00",
  SandyBrown = "#ffaf5f",
  LightSalmon1 = "#ffaf87",
  LightPink1 = "#ffafaf",
  Pink1 = "#ffafd7",
  Plum1 = "#ffafff",
  Gold1 = "#ffd700",
  LightGoldenrod2 = "#ffd75f",
  LightGoldenrod2 = "#ffd787",
  NavajoWhite1 = "#ffd7af",
  MistyRose1 = "#ffd7d7",
  Thistle1 = "#ffd7ff",
  Yellow1 = "#ffff00",
  LightGoldenrod1 = "#ffff5f",
  Khaki1 = "#ffff87",
  Wheat1 = "#ffffaf",
  Cornsilk1 = "#ffffd7",
  Grey100 = "#ffffff",
  Grey3 = "#080808",
  Grey7 = "#121212",
  Grey11 = "#1c1c1c",
  Grey15 = "#262626",
  Grey19 = "#303030",
  Grey23 = "#3a3a3a",
  Grey27 = "#444444",
  Grey30 = "#4e4e4e",
  Grey35 = "#585858",
  Grey39 = "#626262",
  Grey42 = "#6c6c6c",
  Grey46 = "#767676",
  Grey50 = "#808080",
  Grey54 = "#8a8a8a",
  Grey58 = "#949494",
  Grey62 = "#9e9e9e",
  Grey66 = "#a8a8a8",
  Grey70 = "#b2b2b2",
  Grey74 = "#bcbcbc",
  Grey78 = "#c6c6c6",
  Grey82 = "#d0d0d0",
  Grey85 = "#dadada",
  Grey89 = "#e4e4e4",
  Grey93 = "#eeeeee"
)


================================================
FILE: R/utils_plot.R
================================================
# plotops.R
# ::rtemis::
# 2020- EDG rtemis.org

#' @keywords internal
#' @noRd
getlim <- function(x, axs = c("r", "i"), axs.r.pct = .04) {
  axs <- match.arg(axs)

  .x <- na.exclude(x)
  .min <- min(.x)
  .max <- max(.x)

  if (axs == "r") {
    .diff <- .max - .min
    c(.min - axs.r.pct * .diff, .max + axs.r.pct * .diff)
  } else {
    c(.min, .max)
  }
} # /rtemis::getlim


================================================
FILE: R/utils_plotly.R
================================================
# utils_plotly
# ::rtemis::
# 2021- EDG rtemis.org

# plotly_vline calls plotly_vline1 to create a list for one or more vertical
# lines, to be passed to plotly::layout's shapes argument
plotly_vline1 <- function(x, color = "#F48024", width = 1, dash = "dot") {
  list(
    type = "line",
    x0 = x,
    x1 = x,
    y0 = 0,
    y1 = 1,
    yref = "paper",
    line = list(
      color = color,
      width = width,
      dash = dash
    )
  )
}

# Calls plotly_vline1 for each x value
plotly_vline <- function(x, color = "#F48024", width = 1, dash = "dot") {
  color <- recycle(color, x)
  width <- recycle(width, x)
  dash <- recycle(dash, x)
  mapply(
    plotly_vline1,
    x,
    color = color,
    width = width,
    dash = dash,
    SIMPLIFY = FALSE
  )
}


# plotly_hline calls plotly_hline1 to create a list for one or more horizontal
# lines, to be passed to plotly::layout's shapes argument
plotly_hline1 <- function(y, color = "#F48024", width = 1, dash = "dot") {
  list(
    type = "line",
    x0 = 0,
    x1 = 1,
    y0 = y,
    y1 = y,
    xref = "paper",
    line = list(
      color = color,
      width = width,
      dash = dash
    )
  )
}


plotly_hline <- function(y, color = "#F48024", width = 1, dash = "dot") {
  color <- recycle(color, y)
  width <- recycle(width, y)
  dash <- recycle(dash, y)
  mapply(
    plotly_hline1,
    y,
    color = color,
    width = width,
    dash = dash,
    SIMPLIFY = FALSE
  )
}


# bracket y values for boxplot htest
bracket_y <- function(x, pad = c(.04, .05)) {
  l <- max(x) + (diff(range(x)) * pad)
  c(l, rev(l))
}


starbracket_y <- function(x, pad = c(.04, .05, .09)) {
  l <- max(x) + (diff(range(x)) * pad)
  list(star = l[3], bracket = c(l[1:2], rev(l[1:2])))
}


# plotly shade
plotly_shade <- function(
  plt,
  x,
  ypos,
  yneg,
  col,
  alpha = 1,
  legendgroup = NA,
  showlegend = FALSE
) {
  plt <- plotly::add_trace(
    plt,
    x = x,
    y = ypos,
    # type = scatter.type,
    mode = "lines",
    line = list(color = "transparent"),
    legendgroup = legendgroup,
    showlegend = showlegend,
    hoverinfo = "none",
    inherit = FALSE
  )
  plt <- plotly::add_trace(
    plt,
    x = x,
    y = yneg,
    # type = scatter.type,
    mode = "lines",
    fill = "tonexty",
    fillcolor = plotly::toRGB(col, alpha = alpha),
    line = list(color = "transparent"),
    legendgroup = legendgroup,
    showlegend = showlegend,
    hoverinfo = "none",
    inherit = FALSE
  )
}


#' Export plotly plot to file
#'
#' @param x plotly object.
#' @param filename Character: Filename to save the plot to.
#' @param width Numeric: Width of the exported image in pixels.
#' @param height Numeric: Height of the exported image in pixels.
#' @param scale Numeric: Scale factor for the exported image.
#' @param import_kaleido Logical: If TRUE, attempts to import kaleido for exporting plotly plots.
#' @param verbosity Integer: Verbosity level.
#'
#' @return NULL
#'
#' @author EDG
#' @keywords internal
#' @noRd
export_plotly <- function(
  x,
  filename,
  width = 600,
  height = 600,
  scale = 1,
  import_kaleido = TRUE,
  verbosity = 1L
) {
  # Import kaleido
  if (import_kaleido) {
    tryCatch(
      {
        reticulate::py_run_string("import kaleido")
        cat("Kaleido is available for plotly exports.\n")
      },
      error = function(e) {
        cat("Installing kaleido for plotly exports...\n")
        reticulate::py_install("kaleido")
        reticulate::py_run_string("import kaleido")
        cat("Kaleido installed successfully.\n")
      }
    )
  }

  # Intro
  if (verbosity > 0L) {
    msgstart("Exporting plotly plot to ", filename, "...")
  }

  # Export to file ----
  filename <- normalizePath(filename, mustWork = FALSE)
  # Create parent directory if it doesn't exist
  parent_dir <- dirname(filename)
  if (!dir.exists(parent_dir)) {
    dir.create(parent_dir, recursive = TRUE)
    if (!dir.exists(parent_dir)) {
      cli::cli_abort(
        "Failed to create directory {.file {parent_dir}}. Check path & permissions."
      )
    }
  }
  plotly::save_image(
    x,
    file = filename,
    width = width,
    height = height,
    scale = scale
  )

  # Check if the file was created
  if (!file.exists(filename)) {
    cli::cli_abort(
      "Failed to save plotly plot to {.file {filename}}. Check if the file path is correct and writable."
    )
  } else {
    if (verbosity > 0L) {
      msgdone()
    }
  }
} # /rtemis::export_plotly


================================================
FILE: R/utils_print.R
================================================
# print_ops.R
# ::rtemis::
# 2016-23 EDG rtemis.org

is_common_struct <- function(x) {
  class(x)[1] %in%
    c(
      "numeric",
      "integer",
      "character",
      "logical",
      "factor",
      "Date",
      "POSIXct",
      "POSIXlt",
      "list",
      "data.frame",
      "matrix",
      "array",
      "table",
      "ts",
      "tbl_df",
      "data.table"
    )
}

#' Pretty print list
#'
#' Pretty print a list (or data frame) recursively
#'
#' Data frames in R began life as lists
#'
#' @param x list or object that will be converted to a list.
#' @param prefix Character: Optional prefix for names.
#' @param pad Integer: Pad output with this many spaces.
#' @param item_format Formatting function for list item names.
#' @param maxlength Integer: Maximum length of items to show using `headdot()` before truncating with ellipsis.
#' @param center_title Logical: If TRUE, autopad title for centering, if present.
#' @param title Character: Optional title to print before list.
#' @param title_newline Logical: If TRUE, print title on new line.
#' @param newline_pre Logical: If TRUE, print newline before list.
#' @param format_fn_rhs Formatting function for right-hand side values.
#' @param print_class Logical: If TRUE, print abbreviated class of object.
#' @param abbrev_class_n Integer: Number of characters to abbreviate class names to.
#' @param print_df Logical: If TRUE, print data frame contents, otherwise print n rows and columns.
#' @param print_S4 Logical: If TRUE, print S4 object contents, otherwise print class name.
#' @param limit Integer: Maximum number of items to show. Use -1 for unlimited.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
printls <- function(
  x,
  prefix = "",
  pad = 2L,
  item_format = bold,
  maxlength = 4L,
  center_title = TRUE,
  title = NULL,
  title_newline = TRUE,
  newline_pre = FALSE,
  format_fn_rhs = ddSci,
  print_class = TRUE,
  abbrev_class_n = 3L,
  print_df = FALSE,
  print_S4 = FALSE,
  limit = 12L
) {
  # Arguments ----
  if (newline_pre) {
    cat("\n")
  }
  if (is.null(x)) {
    if (!is.null(title)) {
      padcat(title, pad = pad, newline = title_newline, newline_pre = FALSE)
    }
    cat(strrep(" ", pad), "NULL", sep = "")
  } else if (length(x) == 0) {
    cat(class(x), "of length 0.\n")
  } else if (is.data.frame(x) && !print_df) {
    cat(
      "data.frame with",
      NROW(x),
      "rows and",
      NCOL(x),
      "columns.\n"
    )
  } else if (!is_common_struct(x)) {
    cat("object of class:", class(x), "\n")
  } else {
    x <- as.list(x)
    # Get class of each element
    classes_ <- sapply(x, class)
    # Remove closures that will cause error
    is_fn <- which(sapply(x, is.function))
    if (length(is_fn) > 0) {
      for (i in is_fn) {
        x[[i]] <- paste0(as.character(head(deparse(x[[i]]), n = 1L)), "...")
      }
    }
    # Remove NULLs
    null_index <- sapply(x, is.null)
    x[null_index] <- "NULL"
    xnames <- names(x)
    lhs <- max(nchar(paste0(prefix, xnames))) + pad
    if (!is.null(title)) {
      title_pad <- if (center_title) {
        max(0, lhs - round((.5 * nchar(title))) - 3)
      } else {
        0
      }
      padcat(
        title,
        pad = title_pad,
        newline = title_newline,
        newline_pre = FALSE
      )
    } # /title
    counter <- 0L
    # Print each item up to limit items
    if (limit != -1L && length(x) > limit) {
      padcat(
        italic(gray(
          paste(
            "Showing first",
            limit,
            "of",
            length(x),
            "items.\n"
          )
        )),
        pad = pad
      )
    }
    for (i in seq_along(x)) {
      counter <- counter + 1L
      if (limit != -1L && counter > limit) {
        padcat(
          italic(gray(
            paste0(
              "...",
              length(x) - limit,
              " more items not shown.\n"
            )
          )),
          pad = pad
        )
        break
      }
      # Print item
      if (is.list(x[[i]])) {
        if (length(x[[i]]) == 0) {
          cat(paste0(
            item_format(format(
              paste0(prefix, xnames[i]),
              width = lhs,
              justify = "right"
            )),
            ": ",
            format_fn_rhs("(empty list)"),
            "\n"
          ))
        } else {
          cat(
            paste0(
              item_format(format(
                paste0(prefix, xnames[i]),
                width = lhs,
                justify = "right"
              )),
              ": "
            )
          )
          if (is_common_struct(x[[i]])) {
            printls(
              x[[i]],
              pad = lhs + 2,
              newline_pre = TRUE,
              print_df = print_df
            )
          } else {
            cat(
              italic("object of class:", class(x[[i]])),
              "\n"
            )
          }
        }
      } else if (is.logical(x[[i]])) {
        cat(paste0(
          item_format(format(
            paste0(prefix, xnames[i]),
            width = lhs,
            justify = "right"
          )),
          ": ",
          if (print_class) {
            gray(paste0("<", abbreviate("logical", abbrev_class_n), "> "))
          },
          ifelse(isTRUE(x[[i]]), "TRUE", "FALSE"),
          "\n"
        ))
      } else if (S7_inherits(x[[i]])) {
        cat(
          paste0(
            item_format(format(
              paste0(prefix, xnames[i]),
              width = lhs,
              justify = "right"
            )),
            ": "
          ),
          "\n"
        )
        # Print S7 object
        print(x[[i]], pad = lhs + 2)
      } else if (is.data.frame(x[[i]])) {
        cat(paste0(
          item_format(format(
            paste0(prefix, xnames[i]),
            width = lhs,
            justify = "right"
          )),
          ": ",
          if (print_class) {
            gray(paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "))
          },
          headdot(x[[i]], maxlength = maxlength, format_fn = format_fn_rhs),
          "\n"
        ))
      } else if (isS4(x[[i]])) {
        cat(paste0(
          item_format(format(
            paste0(prefix, xnames[i]),
            width = lhs,
            justify = "right"
          )),
          ": "
        ))
        # Print S4 object
        if (print_S4) {
          cat("\n")
          print(x[[i]])
        } else {
          cat("(S4 object of class: '", class(x[[i]]), "')\n", sep = "")
        }
      } else if (!is_common_struct(x[[i]])) {
        cat(paste0(
          item_format(format(
            paste0(prefix, xnames[i]),
            width = lhs,
            justify = "right"
          )),
          ": ",
          if (print_class) {
            gray(paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "))
          },
          italic("object of class:", class(x[[i]])),
          "\n"
        ))
      } else {
        cat(paste0(
          item_format(format(
            paste0(prefix, xnames[i]),
            width = lhs,
            justify = "right"
          )),
          ": ",
          if (print_class) {
            gray(paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "))
          },
          headdot(x[[i]], maxlength = maxlength, format_fn = format_fn_rhs),
          "\n"
        ))
      }
    }
  }
} # /rtemis::printls

# printdf1
# ::rtemis::
# 2016 rtemis.org
#' Print 1 x N data frame
#'
#' Pretty print a data frame containing 1 row of data with named columns as a vertical list
#'   of "       name : value"
#'      " other.name : other.value"
#'
#' @param x data frame
#' @param pad Integer: Pad output with this many spaces.
#'
#' @author EDG
#' @keywords internal
#' @noRd

printdf1 <- function(x, pad = 2) {
  x <- as.data.frame(x)
  # df <- data.frame(Parameter = c(names(x)), Value = unlist(x), row.names = NULL)

  xnames <- colnames(x)
  lhs <- max(nchar(xnames)) + pad

  for (i in seq_len(ncol(x))) {
    cat(
      paste(format(xnames[i], width = lhs, justify = "right"), ":", x[1, i]),
      "\n"
    )
  }
} # /rtemis::printdf1


#' Show data.frame
#'
#' Create a pretty text representation of a data.frame.
#'
#' @param x data frame
#' @param pad Integer: Pad output with this many spaces.
#' @param spacing Integer: Number of spaces between columns.
#' @param ddSci_dp Integer: Number of decimal places to print using [ddSci]. Default = NULL for no
#' formatting
#' @param transpose Logical: If TRUE, transpose `x` before printing.
#' @param justify Character: "right", "left".
#' @param colnames Logical: If TRUE, print column names.
#' @param rownames Logical: If TRUE, print row names.
#' @param colnames_formatter Format function for printing column names.
#' @param rownames_formatter Format function for printing row names.
#' @param newline_pre Logical: If TRUE, print a new line before printing data frame.
#' @param newline Logical: If TRUE, print a new line after printing data frame.
#'
#' @author EDG
#' @keywords internal
#' @noRd
show_df <- function(
  x,
  pad = 0L,
  spacing = 1L,
  ddSci_dp = NULL,
  transpose = FALSE,
  justify = "right",
  incl_colnames = TRUE,
  incl_rownames = TRUE,
  colnames_formatter = highlight,
  rownames_formatter = gray,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  if (transpose) {
    x <- as.data.frame(t(x))
  }
  xnames <- colnames(x)
  xrownames <- gsub(pattern = "\\.", replacement = " ", rownames(x))
  if (!is.null(ddSci_dp)) {
    xf <- as.data.frame(matrix(ddSci(x, decimal_places = ddSci_dp), NROW(x)))
    colnames(xf) <- xnames
    rownames(xf) <- xrownames
    x <- xf
  }

  col_char <- sapply(seq_along(xnames), \(i) {
    max(nchar(as.character(x[, i])), nchar(xnames[i]))
  })

  xrownames_spacing <- if (incl_rownames) {
    max(nchar(xrownames)) + pad
  } else {
    pad
  }
  spacer <- strrep(" ", spacing)

  out <- character()
  if (incl_colnames) {
    out <- paste0(
      out,
      strrep(" ", xrownames_spacing)
    )
    if (justify == "left") {
      out <- paste0(out, spacer)
    }
    for (i in seq_len(NCOL(x))) {
      out <- paste0(
        out,
        colnames_formatter(
          format(
            xnames[i],
            width = col_char[i] + spacing,
            justify = justify
          ),
          output_type = output_type
        )
      )
    }
    out <- paste0(out, "\n")
  }

  # Row names
  if (incl_rownames) {
    for (i in seq_len(NROW(x))) {
      out <- paste0(
        out,
        rownames_formatter(
          format(
            xrownames[i],
            width = xrownames_spacing,
            justify = "right"
          ),
          output_type = output_type
        )
      )
      for (j in seq_len(NCOL(x))) {
        out <- paste0(
          out,
          spacer,
          paste(format(x[i, j], width = col_char[j], justify = justify))
        )
      }
      out <- paste0(out, "\n")
    }
  } else {
    for (i in seq_len(NROW(x))) {
      for (j in seq_len(NCOL(x))) {
        out <- paste0(
          out,
          spacer,
          paste(format(x[i, j], width = col_char[j], justify = justify))
        )
      }
      out <- paste0(out, "\n")
    }
  }
  out
} # /rtemis::show_df


#' Show table
#'
#' @param x table.
#' @param spacing Integer: Number of spaces between columns.
#' @param pad Integer: Pad output with this many spaces.
#'
#' @return Character: formatted string.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
show_table <- function(
  x,
  spacing = 2L,
  pad = 2L,
  formatter = highlight,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  dim_names <- names(attr(x, "dimnames"))
  class_names <- attr(x, "dimnames")[["Reference"]]
  n_classes <- NCOL(x)
  mat <- matrix(c(x), NROW(x))
  colnames(mat) <- colnames(x)
  rownames(mat) <- rownames(x)
  # Column width without spacing
  col.width <- sapply(seq_along(class_names), \(i) {
    max(nchar(as.character(x[, i])), nchar(class_names[i]))
  })
  lhspad <- max(nchar(class_names), nchar(dim_names[1])) + spacing + pad
  # Top dimname
  formatted_dimname <- format(
    dim_names[2],
    width = lhspad + nchar(dim_names[2]),
    justify = "right"
  )
  out <- paste0(
    bold(formatted_dimname, output_type = output_type),
    "\n"
  )
  # Left dimname
  formatted_dimname1 <- format(
    dim_names[1],
    width = lhspad - spacing,
    justify = "right"
  )
  out <- paste0(
    out,
    bold(formatted_dimname1, output_type = output_type),
    strrep(" ", spacing)
  )

  # Column names
  # (Continue on same row as left dimname)
  for (i in seq_len(n_classes)) {
    formatted_classname <- format(
      class_names[i],
      width = col.width[i] + spacing,
      justify = "left"
    )
    out <- paste0(
      out,
      formatter(formatted_classname, output_type = output_type)
    )
  }
  # Add Confusion matrix excluding colnames that are already added
  out <- paste0(
    out,
    "\n",
    show_df(
      mat,
      pad = lhspad - max(nchar(class_names)) - spacing,
      incl_colnames = FALSE,
      spacing = spacing,
      colnames_formatter = formatter,
      rownames_formatter = formatter,
      output_type = output_type
    )
  )
  out
} # /rtemis::show_table

#' @keywords internal
#' @noRd
pastels <- function(x, bullet = "  -") {
  paste(paste(bullet, x, collapse = "\n"), "\n")
} # /rtemis::pastels


#' Get first few elements of a vector with ellipsis
#'
#' @details
#' Used, for example, by `repr_ls`
#'
#' @return Character.
#'
#' @keywords internal
#' @noRd
headdot <- function(x, maxlength = 6L, format_fn = identity) {
  if (maxlength == -1L || length(x) < maxlength) {
    paste(format_fn(x), collapse = ", ")
  } else {
    paste0(
      paste(format_fn(head(as.vector(x), n = maxlength)), collapse = ", "),
      "..."
    )
  }
} # /rtemis::headdot


#' Print Size
#'
#' Get `NCOL(x)` and \code{NROW{x}}
#'
#' @param x R object (usually that inherits from matrix or data.frame)
#' @param name Character: Name of input object
#' @param verbosity Integer: Verbosity level.
#' @param newline Logical: If TRUE, end with new line character.
#'
#' @return vector of NROW, NCOL invisibly
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' catsize(iris)
catsize <- function(x, name = NULL, verbosity = 1L, newline = TRUE) {
  if (inherits(x, c("matrix", "data.frame"))) {
    .nrow <- NROW(x)
    .ncol <- NCOL(x)
    nrows <- format(.nrow, big.mark = ",")
    ncols <- format(.ncol, big.mark = ",")
    if (verbosity > 0L) {
      pcat(
        name,
        paste(highlight(nrows), "x", highlight(ncols)),
        newline = newline
      )
    }
    invisible(c(.nrow, .ncol))
  } else {
    .nels <- length(x)
    nels <- format(.nels, big.mark = ",")
    if (verbosity > 0L) {
      cat(
        # "There",
        # ngettext(.nels, "is", "are"),
        name,
        highlight(nels),
        # ngettext(.nels, "element", "elements"),
        if (newline) "\n"
      )
    }
    invisible(.nels)
  }
} # /rtemis::catsize


#' @author EDG
#' @keywords internal
#' @noRd
list2text <- function(x, sep = ": ", line = "\n") {
  .names <- names(x)
  sapply(seq_along(x), \(i) {
    paste0(.names[i], sep, x[[i]], line)
  }) |>
    paste0(collapse = "")
} # /rtemis::list2text


#' List to HTML
#'
#' @author EDG
#' @keywords internal
#' @noRd
list2html <- function(
  x,
  sep = ": ",
  col = "#16A0AC",
  key_weight = 100,
  value_weight = 300,
  line = "<br>"
) {
  .names <- names(x)
  sapply(seq_along(x), \(i) {
    paste0(
      span(.names[i], style = paste0("font-weight:", key_weight, ";")),
      sep,
      span(
        x[[i]],
        style = paste0("color:", col, "; font-weight:", value_weight, ";")
      ),
      line
    )
  }) |>
    paste0(collapse = "") |>
    htmltools::HTML()
} # /rtemis::list2html


#' Helper function to build padded string equivalent of padcat
#'
#' @param text Character: Text to pad.
#' @param pad Integer: Number of spaces to pad.
#' @param newline_pre Logical: If TRUE, add newline before text.
#' @param newline Logical: If TRUE, add newline after text.
#'
#' @return Character: Padded string.
#'
#' @author EDG
#' @keywords internal
#' @noRd
show_padded <- function(
  text,
  pad = 2L,
  newline_pre = FALSE,
  newline = FALSE
) {
  result <- ""
  if (newline_pre) {
    result <- paste0(result, "\n")
  }
  result <- paste0(result, strrep(" ", pad))
  result <- paste0(result, text)
  if (newline) {
    result <- paste0(result, "\n")
  }
  result
}


#' Show list as formatted string
#'
#' Works exactly like printls, but instead of printing to console with cat,
#' it outputs a single string, formatted using mformat, so that cat(repr_ls(x))
#' looks identical to printls(x) for any list x
#'
#' @param x list or object that will be converted to a list.
#' @param prefix Character: Optional prefix for names.
#' @param pad Integer: Pad output with this many spaces.
#' @param item_format Formatting function for items.
#' @param maxlength Integer: Maximum length of items to show using `headdot()` before truncating with ellipsis.
#' @param center_title Logical: If TRUE, autopad title for centering, if present.
#' @param title Character: Title to print before list.
#' @param title_newline Logical: If TRUE, print title on new line.
#' @param newline_pre Logical: If TRUE, print newline before list.
#' @param format_fn_rhs Formatting function for right-hand side of items.
#' @param print_class Logical: If TRUE, print abbreviated class of object.
#' @param abbrev_class_n Integer: Number of characters to abbreviate class names to.
#' @param print_df Logical: If TRUE, print data frame contents, otherwise print n rows and columns.
#' @param print_S4 Logical: If TRUE, print S4 object contents, otherwise print class name.
#' @param limit Integer: Maximum number of items to show.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted string that can be printed with cat()
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
repr_ls <- function(
  x,
  prefix = "",
  pad = 2L,
  item_format = bold,
  maxlength = 4L,
  center_title = TRUE,
  title = NULL,
  title_newline = TRUE,
  newline_pre = FALSE,
  format_fn_rhs = ddSci,
  print_class = TRUE,
  abbrev_class_n = 3L,
  print_df = FALSE,
  print_S4 = FALSE,
  limit = 12L,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  # Initialize output string
  result <- ""

  # Arguments ----
  if (newline_pre) {
    result <- paste0(result, "\n")
  }

  if (is.null(x)) {
    if (!is.null(title)) {
      result <- paste0(
        result,
        show_padded(title, pad = pad, newline = title_newline)
      )
    }
    result <- paste0(result, strrep(" ", pad), "NULL")
  } else if (length(x) == 0) {
    result <- paste0(result, class(x)[1], " of length 0.\n")
  } else if (is.data.frame(x) && !print_df) {
    result <- paste0(
      result,
      show_padded("(data.frame with ", pad = pad),
      NROW(x),
      " rows and ",
      NCOL(x),
      " columns.)\n"
    )
  } else if (!is_common_struct(x)) {
    result <- paste0(
      result,
      "object of class: ",
      paste(class(x), collapse = ", "),
      "\n"
    )
  } else {
    x <- as.list(x)
    # Get class of each element
    classes_ <- sapply(x, function(el) class(el)[[1L]])
    # Deparse closures that would cause error
    is_fn <- which(sapply(x, is.function))
    if (length(is_fn) > 0) {
      for (i in is_fn) {
        x[[i]] <- paste0(as.character(head(deparse(x[[i]]), n = 1L)), "...")
      }
    }
    # Set NULLs to "NULL"
    null_index <- sapply(x, is.null)
    x[null_index] <- "NULL"
    xnames <- names(x)
    lhs <- max(nchar(paste0(prefix, xnames))) + pad

    if (!is.null(title)) {
      title_pad <- if (center_title) {
        max(0, lhs - round((.5 * nchar(title))) - 3)
      } else {
        0
      }
      result <- paste0(
        result,
        show_padded(title, pad = title_pad, newline = title_newline)
      )
    } # /title

    counter <- 0L
    # Print each item up to limit items
    if (limit != -1L && length(x) > limit) {
      limit_text <- paste0(
        italic(
          gray(
            paste0(
              "Showing first ",
              limit,
              " of ",
              length(x),
              " items.\n"
            ),
            output_type = output_type
          ),
          output_type = output_type
        )
      )
      result <- paste0(result, show_padded(limit_text, pad = pad))
    }

    for (i in seq_along(x)) {
      counter <- counter + 1L
      if (limit != -1L && counter > limit) {
        more_text <- paste0(
          italic(
            gray(
              paste0(
                "...",
                length(x) - limit,
                " more items not shown.\n"
              )
            ),
            output_type = output_type
          )
        )
        result <- paste0(result, show_padded(more_text, pad = pad))
        break
      }

      # Print item
      if (is.list(x[[i]])) {
        if (length(x[[i]]) == 0) {
          item_text <- paste0(
            item_format(
              format(
                paste0(prefix, xnames[i]),
                width = lhs,
                justify = "right"
              ),
              output_type = output_type
            ),
            ": ",
            format_fn_rhs("(empty list)"),
            "\n"
          )
          result <- paste0(result, item_text)
        } else {
          item_text <- paste0(
            item_format(
              format(
                paste0(prefix, xnames[i]),
                width = lhs,
                justify = "right"
              ),
              output_type = output_type
            ),
            ": "
          )
          result <- paste0(result, item_text)

          if (is_common_struct(x[[i]])) {
            sub_result <- repr_ls(
              x[[i]],
              pad = lhs + 2,
              item_format = item_format,
              newline_pre = TRUE, # important
              format_fn_rhs = format_fn_rhs,
              print_class = print_class,
              limit = limit,
              output_type = output_type
            )
            result <- paste0(result, sub_result)
          } else {
            result <- paste0(
              result,
              italic(
                paste(
                  "object of class:",
                  paste(class(x[[i]]), collapse = ", ")
                ),
                output_type = output_type
              ),
              "\n"
            )
          }
        }
      } else if (is.logical(x[[i]])) {
        item_text <- paste0(
          item_format(
            format(
              paste0(prefix, xnames[i]),
              width = lhs,
              justify = "right"
            ),
            output_type = output_type
          ),
          ": ",
          if (print_class) {
            gray(
              paste0("<", abbreviate("logical", abbrev_class_n), "> "),
              output_type = output_type
            )
          } else {
            ""
          },
          ifelse(isTRUE(x[[i]]), "TRUE", "FALSE"),
          "\n"
        )
        result <- paste0(result, item_text)
      } else if (S7_inherits(x[[i]])) {
        item_text <- paste0(
          item_format(
            format(
              paste0(prefix, xnames[i]),
              width = lhs,
              justify = "right"
            ),
            output_type = output_type
          ),
          ":\n" # S7 show begin on next line, otherwise must have different pad for first line (S7name) and for rest
        )
        result <- paste0(result, item_text)
        # Show S7 object: repr() must return a character string of length 1
        s7_output <- tryCatch(
          {
            repr(x[[i]], pad = lhs + 2, output_type = output_type)
          },
          error = function(e) {
            paste0(
              "(S7 object of class: '",
              paste(class(x[[i]]), collapse = ", "),
              "')\n"
            )
          }
        )
        result <- paste0(result, s7_output)
      } else if (is.data.frame(x[[i]])) {
        item_text <- paste0(
          item_format(
            format(
              paste0(prefix, xnames[i]),
              width = lhs,
              justify = "right"
            ),
            output_type = output_type
          ),
          ": ",
          if (print_class) {
            gray(
              paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "),
              output_type = output_type
            )
          } else {
            ""
          },
          headdot(x[[i]], maxlength = maxlength, format_fn = format_fn_rhs),
          "\n"
        )
        result <- paste0(result, item_text)
      } else if (isS4(x[[i]])) {
        item_text <- paste0(
          item_format(
            format(
              paste0(prefix, xnames[i]),
              width = lhs,
              justify = "right"
            ),
            output_type = output_type
          ),
          ": "
        )
        result <- paste0(result, item_text)

        # Print S4 object
        if (print_S4) {
          result <- paste0(result, "\n")
          # For S4 objects, we would need to capture their print output
          # This is complex, so for now we'll just show the class
          result <- paste0(
            result,
            "(S4 object of class: '",
            paste(class(x[[i]]), collapse = ", "),
            "')\n"
          )
        } else {
          result <- paste0(
            result,
            "(S4 object of class: '",
            paste(class(x[[i]]), collapse = ", "),
            "')\n"
          )
        }
      } else if (!is_common_struct(x[[i]])) {
        item_text <- paste0(
          item_format(
            format(
              paste0(prefix, xnames[i]),
              width = lhs,
              justify = "right"
            ),
            output_type = output_type
          ),
          ": ",
          if (print_class) {
            gray(
              paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "),
              output_type = output_type
            )
          } else {
            ""
          },
          italic(
            paste(
              "object of class:",
              paste(class(x[[i]]), collapse = ", ")
            ),
            output_type = output_type
          ),
          "\n"
        )
        result <- paste0(result, item_text)
      } else {
        item_text <- paste0(
          item_format(
            format(
              paste0(prefix, xnames[i]),
              width = lhs,
              justify = "right"
            ),
            output_type = output_type
          ),
          ": ",
          if (print_class) {
            gray(
              paste0("<", abbreviate(classes_[[i]], abbrev_class_n), "> "),
              output_type = output_type
            )
          } else {
            ""
          },
          headdot(x[[i]], maxlength = maxlength, format_fn = format_fn_rhs),
          "\n"
        )
        result <- paste0(result, item_text)
      }
    }
  }

  result
} # /rtemis::repr_ls


# %% inspect.class_data.frame ----
method(inspect, class_data.frame) <- function(x) {
  out <- paste0(
    fmt("<", col = "#808080"),
    fmt(class(x)[[1L]], col = highlight_col, bold = TRUE),
    fmt(">", col = "#808080"),
    " ",
    fmt(NROW(x), bold = TRUE),
    fmt(" x ", col = "#808080"),
    fmt(NCOL(x), bold = TRUE),
    "\n",
    repr_ls(x, pad = 0L, print_class = TRUE, print_df = TRUE)
  )
  cat(out)
  invisible(out)
} # /rtemis::inspect.class_data.frame


================================================
FILE: R/utils_rt.R
================================================
# info
# ::rtemis::
# 2016- EDG rtemis.org

#' `rtemis-internals`: `intro`
#'
#' Intro
#'
#' Starts function execution timer and opens log file.
#' Pairs with `outro`.
#'
#' @keywords internal
#' @noRd
intro <- function(
  .message = "\u25b6",
  logfile = NULL,
  call_depth = 1,
  caller = NULL,
  newline_pre = FALSE,
  use_sink = FALSE,
  verbosity = 1L
) {
  if (!is.null(logfile)) {
    logfile <- normalizePath(logfile, mustWork = FALSE)
    outdir <- dirname(logfile)
    if (!dir.exists(outdir)) {
      dir.create(outdir, showWarnings = FALSE, recursive = TRUE)
    }
    if (use_sink) {
      sink(logfile, append = TRUE, split = verbosity > 0L)
    }
    log_to_file("Started.", logfile = logfile)
  }
  start_time <- proc.time()
  if (verbosity > 0L || !is.null(logfile)) {
    if (newline_pre) {
      cat("\n")
    }
    msg(
      .message,
      call_depth = call_depth,
      sep = "",
      caller_id = 2,
      caller = caller
    )
  }
  invisible(start_time)
} # /rtemis::intro


# Function to output seconds if seconds < 60, otherwise output minutes
#' @keywords internal
#' @noRd
format_seconds <- function(seconds) {
  if (seconds < 60) {
    paste0(bold(ddSci(seconds)), " seconds")
  } else {
    paste0(bold(ddSci(round(seconds / 60))), " minutes")
  }
}


#' `rtemis-internals`: `outro`
#'
#' Outro
#'
#' Stops function execution timer and closes log file.
#'
#' Second part to `intro`
#'
#' @keywords internal
#' @noRd
outro <- function(
  start_time,
  message = NULL,
  sink_off = FALSE,
  logfile = NULL,
  newline_pre = FALSE,
  real_user_system = FALSE,
  verbosity = 1L
) {
  elapsed <- as.numeric(proc.time() - start_time)
  if (verbosity > 0L || sink_off) {
    if (newline_pre) {
      cat("\n")
    }
    if (real_user_system) {
      msg0(
        paste0(
          checkmark(),
          " Done in ",
          format_seconds(elapsed[3]),
          " (",
          "Real:",
          ddSci(elapsed[3]),
          "/User:",
          ddSci(elapsed[1]),
          "/System:",
          ddSci(elapsed[2]),
          ")."
        ),
        caller_id = 2
      )
    } else {
      msg0(
        paste0(
          checkmark(),
          " Done in ",
          format_seconds(elapsed[3]),
          "."
        ),
        caller_id = 2
      )
    }
  }

  if (sink_off) {
    sink()
  }
  if (!is.null(logfile)) {
    log_to_file("Done.", logfile = logfile)
  }
  invisible(elapsed)
} # /rtemis::outro


#' Summarize supervised inputs
#'
#' @param x tabular data: Training set data.
#' @param dat_validation data.frame or similar: Validation set data.
#' @param dat_test data.frame or similar: Test set data.
#'
#' @author EDG
#' @keywords internal
#' @noRd
summarize_supervised <- function(
  x,
  dat_validation = NULL,
  dat_test = NULL
) {
  # msg("Input data summary:")
  msg0(
    if (!is.null(dat_validation)) "  ",
    "Training set: ",
    highlight(NROW(x)),
    " cases x ",
    highlight(NCOL(x) - 1),
    " features."
  )
  if (!is.null(dat_validation)) {
    msg0(
      "Validation set: ",
      highlight(NROW(dat_validation)),
      " cases x ",
      highlight(NCOL(dat_validation) - 1),
      " features."
    )
  }
  if (!is.null(dat_test)) {
    msg0(
      if (!is.null(dat_validation)) "  ",
      "    Test set: ",
      highlight(NROW(dat_test)),
      " cases x ",
      highlight(NCOL(dat_test) - 1),
      " features."
    )
  }
} # /rtemis::summarize_supervised


#' Summarize unsupervised inputs
#'
#' @param x tabular data: Training set data.
#'
#' @author EDG
#' @keywords internal
#' @noRd
summarize_unsupervised <- function(x) {
  msg(
    "Input:",
    highlight(NROW(x)),
    "cases x",
    highlight(NCOL(x)),
    "features."
  )
} # /rtemis::summarize_unsupervised


#' Log to file
#'
#' @param x Character: Message to log.
#' @param logfile Character: Path to log file.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
log_to_file <- function(x, logfile) {
  cat(
    paste0(
      datetime(),
      " ",
      x,
      "\n"
    ),
    file = logfile,
    append = TRUE
  )
} # /rtemis::log_to_file


================================================
FILE: R/utils_rules.R
================================================
# utils_rules.R
# ::rtemis::
# EDG rtemis.org

#' Match Rules to Cases
#'
#' @param x Matrix / data frame: Input features
#' @param rules Character vector: Rules
#' @param verbosity Integer: Verbosity level.
#'
#' @return cases-by-rules matrix (binary; 1: match, 0: no match)
#' @author EDG
#'
#' @keywords internal
#' @noRd
match_cases_by_rules <- function(x, rules, prefix = "Rule_", verbosity = 1L) {
  n_cases <- NROW(x)
  n_rules <- length(rules)
  if (!is.data.table(x)) {
    # {data.table}
    x <- data.table::as.data.table(x)
  } else {
    # Either make copy, or set ID to NULL before exit
    # x <- copy(x)
    on.exit(x[, ID := NULL])
  }
  # appease R CMD check
  ID <- NULL
  x[, ID := seq_len(n_cases)]
  cxr <- matrix(0, n_cases, n_rules)
  if (verbosity > 0L) {
    msgstart(
      "Matching",
      highlight(n_rules),
      "rules to",
      highlight(n_cases),
      "cases..."
    )
  }
  for (i in seq_along(rules)) {
    match <- x[eval(parse(text = rules[i])), ID]
    cxr[match, i] <- 1
  }
  if (!is.null(prefix)) {
    colnames(cxr) <- paste0(prefix, seq_len(n_rules))
  }
  if (verbosity > 0L) {
    msgdone()
  }
  cxr
} # /rtemis::match_cases_by_rules


#' Index cases by rules
#'
#' Get an index of which cases match which rule - meant for cases where each case matches one rule
#' and one rule only
#'
#' @inheritParams match_cases_by_rules
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
index_cases_by_rules <- function(x, rules, verbosity = 1L) {
  cxr <- match_cases_by_rules(x, rules, verbosity)
  apply(cxr, 1, \(i) which(i == 1))
}


#' Prune a rule to a maximum length
#'
#' @param rule Character: A rule.
#' @param max_length Integer: The maximum number of conditions to keep.
#' @param sep Character: The separator between conditions.
#'
#' @return Character: The pruned rule.
#' @author EDG
#'
#' @keywords internal
#' @noRd
simple_prune_ <- function(rule, max_length, sep = " & ") {
  conditions <- strsplit(rule, sep)[[1]]
  if (length(conditions) > max_length) {
    conditions <- conditions[1:max_length]
    paste(conditions, collapse = sep)
  } else {
    rule
  }
} # /rtemis::simple_prune_


#' Prune rules to a maximum length
#'
#' @param rule Character vector: Rules.
#' @param max_length Integer: The maximum number of conditions to keep.
#' @param sep Character: The separator between conditions.
#'
#' @return Character: The pruned rule.
#'
#' @author EDG
#'
#' @keywords internal
#' @noRd
simple_prune <- function(rules, max_length, sep = " & ") {
  rules <- sapply(
    rules,
    simple_prune_,
    max_length = max_length,
    sep = sep,
    USE.NAMES = FALSE
  )
  rules
} # /rtemis::simple_prune


#' Extract variable names from rules
#'
#' @param rules Character vector: Rules.
#' @param unique Logical: If TRUE, return only unique variables.
#'
#' @return Character vector: Variable names.
#'
#' @author EDG
#' @keywords internal
#' @noRd
get_vars_from_rules <- function(rules, unique = FALSE) {
  # Extract variables from rules
  vars <- unique(unlist(strsplit(rules, " & ")))
  # Get string up to first "<", ">", "=", "!", or "%in%"
  vars <- gsub("(<|>|=|!|%in%).*", "", vars)
  vars <- gsub(" .*", "", vars)
  if (unique) {
    vars <- unique(vars)
  }
  vars
} # /rtemis::get_vars_from_rules

#' Format rules
#'
#' Converts R-executable logical expressions to a more human-friendly format
#'
#' @param x Vector, string: Logical expressions
#' @param space_after_comma Logical: If TRUE, place spaces after commas.
#' @param decimal_places Integer: Limit all floats (numbers of the form 9.9) to this many
#' decimal places
#'
#' @return Character vector: Formatted rules.
#'
#' @author EDG
#' @keywords internal
#' @noRd
format_rules <- function(x, space_after_comma = FALSE, decimal_places = NULL) {
  x <- gsub("[&+]", "AND", x)
  x <- gsub(">", " > ", x)
  x <- gsub("<=", " <= ", x)
  x <- gsub("%in%", "IN", x)
  x <- gsub("c\\(", "{", x)
  x <- gsub("\\)", "}", x)
  x <- gsub("'", "", x)
  if (space_after_comma) {
    x <- gsub(",", ", ", x)
  }
  if (!is.null(decimal_places)) {
    x <- gsubfn::gsubfn(
      "([0-9.]+[0-9])",
      function(i) ddSci(i, decimal_places = decimal_places),
      x,
      engine = "R"
    )
  }
  x
} # /rtemis::format_rules


#' Format LightRuleFit rules
#'
#' Converts R-executable logical expressions to a more human-friendly format
#'
#' @param x Vector, string: Logical expressions
#' @param space_after_comma Logical: If TRUE, place spaces after commas.
#' @param decimal_places Integer: Limit all floats (numbers of the form 9.9) to this many
#' decimal places
#'
#' @return Character vector: Formatted rules.
#'
#' @author EDG
#' @keywords internal
#' @noRd
format_LightRuleFit_rules <- function(
  x,
  space_after_comma = FALSE,
  decimal_places = NULL
) {
  x <- gsub("[&+]", "AND", x)
  x <- gsub(">", " > ", x)
  x <- gsub("<=", " <= ", x)
  x <- gsub("%in%", "IN", x)
  x <- gsub("%notin%", "NOT IN", x)
  x <- gsub("c\\(", "{", x)
  x <- gsub("\\)", "}", x)
  x <- gsub("'", "", x)
  if (space_after_comma) {
    x <- gsub(",", ", ", x)
  }
  if (!is.null(decimal_places)) {
    x <- gsubfn::gsubfn(
      "([0-9.]+[0-9])",
      function(i) ddSci(i, decimal_places = decimal_places),
      x,
      engine = "R"
    )
  }
  gsub("  ", " ", x)
} # /rtemis::format_LightRuleFit_rules


# rules2medmod
# ::rtemis::
# 2018 EDG rtemis.org

#' Convert rules from cutoffs to median/mode and range
#'
#' Convert rules from cutoffs to `median (range)` and `mode (range)` format
#'
#' @param rules Character, vector: Input rules
#' @param x Data frame: Data to evaluate rules
#' @param .ddSci Logical: If TRUE, format all continuous variables using
#' [ddSci], which will give either 2 decimal places, or scientific
#' notation if two decimal places result in 0.00
#' @param verbosity Integer: Verbosity level.
#'
#' @return Character vector.
#'
#' @author EDG
#' @keywords internal
#' @noRd
rules2medmod <- function(rules, x, .ddSci = TRUE, verbosity = 1L) {
  cxr <- match_cases_by_rules(x, rules, verbosity = verbosity)
  nrules <- length(rules)
  rules_f <- vector("character", nrules)
  frmt <- if (.ddSci) ddSci else I
  if (verbosity > 0L) {
    msg("Converting rules...")
  }
  for (i in seq(rules)) {
    if (verbosity > 1L) {
      msg_info("#", i, "/", nrules, "...")
    }
    dat <- x[cxr[, i] == 1, ]
    sub <- strsplit(rules[i], "&")[[1]]
    rule <- character()
    for (j in seq(sub)) {
      categorical <- grepl("%in%", sub[j])
      if (categorical) {
        var <- gsub("\\s", "", strsplit(sub[j], "%in%")[[1]][1])
        vals <- dat[[var]]
        value <- paste0(
          get_mode(vals),
          " (",
          paste(levels(droplevels(vals)), collapse = ", "),
          ")"
        )
        rule[j] <- paste0(var, " = ", value)
      } else {
        sub[j] <- gsub(">|>=|<|<=", "@", sub[j])
        var <- gsub("\\s", "", strsplit(sub[j], "@")[[1]][1])
        vals <- dat[[var]]
        value <- paste0(
          frmt(median(vals)),
          " (",
          frmt(min(vals)),
          ":",
          frmt(max(vals)),
          ")"
        )
        rule[j] <- paste0(var, " = ", value)
      }
    } # /loop through each rule's conditions
    # This consolidates conditions like a > 3 & a > 5 to one
    rules_f[i] <- paste(unique(rule), collapse = " & ")
  } # /loop through rules

  if (verbosity > 0L) {
    msg("Done")
  }
  rules_f
} # /rtemis::rules2medmod


================================================
FILE: R/utils_strings.R
================================================
# strng.R
# ::rtemis::
# 2022 EDG rtemis.org

# General hilite function output bold + any color.
hilite <- function(
  ...,
  col = highlight_col,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)
  if (output_type == "ansi") {
    paste0("\033[1;38;5;", col, "m", paste(...), "\033[0m")
  } else if (output_type == "html") {
    paste0(
      "<span style='color: #",
      col,
      "; font-weight: bold;'>",
      paste(...),
      "</span>"
    )
  } else {
    paste0(...)
  }
} # /rtemis::hilite


#' @param x Numeric: Input
#'
#' @keywords internal
#' @noRd
highlightbig <- function(x, output_type = c("ansi", "html", "plain")) {
  highlight(
    format(x, scientific = FALSE, big.mark = ","),
    output_type = output_type
  )
}


#' Red
#'
#' @author EDG
#' @keywords internal
#' @noRd
red <- function(..., bold = FALSE, output_type = c("ansi", "html", "plain")) {
  fmt(
    paste(...),
    col = rt_red,
    bold = bold,
    output_type = output_type
  )
}


#' Green
#'
#' @author EDG
#' @keywords internal
#' @noRd
green <- function(..., bold = FALSE, output_type = c("ansi", "html", "plain")) {
  fmt(
    paste(...),
    col = rt_green,
    bold = bold,
    output_type = output_type
  )
}


#' Blue
#'
#' @author EDG
#' @keywords internal
#' @noRd
blue <- function(..., bold = FALSE, output_type = c("ansi", "html", "plain")) {
  fmt(
    paste(...),
    col = rt_blue,
    bold = bold,
    output_type = output_type
  )
}


#' Orange
#'
#' @author EDG
#' @keywords internal
#' @noRd
orange <- function(
  ...,
  bold = FALSE,
  output_type = c("ansi", "html", "plain")
) {
  fmt(
    paste(...),
    col = rt_orange,
    bold = bold,
    output_type = output_type
  )
}


#' Reset ANSI formatting
#'
#' @param ... Optional character: Text to be output to console.
#'
#' @return Character: Text with ANSI reset code prepended.
#'
#' @author EDG
#' @keywords internal
#' @noRd
reset <- function(...) {
  paste0("\033[0m", paste(...))
}


#' Get rtemis citation
#'
#' @return Character: Citation command.
#'
#' @author EDG
#' @keywords internal
#' @noRd
rtcitation <- paste0(
  "> ",
  fmt("citation", col = rt_blue),
  "(",
  fmt("rtemis", col = rt_teal),
  ")"
)


checkmark <- function(
  col = rt_green,
  output_type = c("ansi", "html", "plain")
) {
  fmt("\u2713", col = col, bold = TRUE, output_type = output_type)
}

crossmark <- function(output_type = c("ansi", "html", "plain")) {
  fmt("\u2715", col = rt_red, bold = TRUE, output_type = output_type)
}


#' Success message
#'
#' @param ... Character: Message components.
#' @param sep Character: Separator between message components.
#' @param end Character: End character.
#' @param pad Integer: Number of spaces to pad the message with.
#'
#' @author EDG
#' @keywords internal
#' @noRd
yay <- function(..., sep = " ", end = "\n", pad = 0) {
  message(
    strrep(" ", pad),
    paste(checkmark(), ..., sep = sep),
    end,
    appendLF = FALSE
  )
} # /rtemis::yay


#' Failure message
#'
#' @param ... Character: Message components.
#' @param sep Character: Separator between message components.
#' @param end Character: End character.
#' @param pad Integer: Number of spaces to pad the message with.
#'
#' @author EDG
#' @keywords internal
#' @noRd
nay <- function(..., sep = " ", end = "\n", pad = 0) {
  message(
    strrep(" ", pad),
    paste(crossmark(), ..., sep = sep),
    end,
    appendLF = FALSE
  )
} # /rtemis::nay


#' Format text for label printing
#'
#' @param x Character: Input
#' @param underscores_to_spaces Logical: If TRUE, convert underscores to spaces.
#' @param dotsToSpaces Logical: If TRUE, convert dots to spaces.
#' @param toLower Logical: If TRUE, convert to lowercase (precedes `toTitleCase`).
#' Default = FALSE (Good for getting all-caps words converted to title case, bad for abbreviations
#' you want to keep all-caps)
#' @param toTitleCase Logical: If TRUE, convert to Title Case. Default = TRUE (This does not change
#' all-caps words, set `toLower` to TRUE if desired)
#' @param capitalize_strings Character, vector: Always capitalize these strings, if present. Default = `"id"`
#' @param stringsToSpaces Character, vector: Replace these strings with spaces. Escape as needed for `gsub`.
#' Default = `"\\$"`, which formats common input of the type `data.frame$variable`
#'
#' @return Character vector.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- c("county_name", "total.cost$", "age", "weight.kg")
#' labelify(x)
labelify <- function(
  x,
  underscores_to_spaces = TRUE,
  dotsToSpaces = TRUE,
  toLower = FALSE,
  toTitleCase = TRUE,
  capitalize_strings = c("id"),
  stringsToSpaces = c("\\$", "`")
) {
  if (is.null(x)) {
    return(NULL)
  }
  xf <- x
  for (i in stringsToSpaces) {
    xf <- gsub(i, " ", xf)
  }
  for (i in capitalize_strings) {
    xf <- gsub(paste0("^", i, "$"), toupper(i), xf, ignore.case = TRUE)
  }
  if (underscores_to_spaces) {
    xf <- gsub("_", " ", xf)
  }
  if (dotsToSpaces) {
    xf <- gsub("\\.", " ", xf)
  }
  if (toLower) {
    xf <- tolower(xf)
  }
  if (toTitleCase) {
    xf <- tools::toTitleCase(xf)
  }
  xf <- gsub(" {2,}", " ", xf)
  xf <- gsub(" $", "", xf)

  # Remove [[X]], where X is any length of characters or numbers
  gsub("\\[\\[.*\\]\\]", "", xf)
} # /rtemis::labelify


#' Clean names
#'
#' Clean character vector by replacing all symbols and sequences of symbols with single
#' underscores, ensuring no name begins or ends with a symbol
#'
#' @param x Character vector.
#' @param sep Character: Separator to replace symbols with.
#' @param prefix_digits Character: prefix to add to names beginning with a
#' digit. Set to NA to skip.
#'
#' @return Character vector.
#'
#' @author EDG
#' @export
#'
#' @examples
#' x <- c("Patient ID", "_Date-of-Birth", "SBP (mmHg)")
#' x
#' clean_names(x)
#' clean_names(x, sep = " ")
clean_names <- function(x, sep = "_", prefix_digits = "V_") {
  xc <- gsub("[^[:alnum:]]{1,}", sep, x)
  xc <- gsub(paste0("^", sep, "+|", sep, "+$"), "", xc)
  if (!is.na(prefix_digits)) {
    sn_idi <- grep("^[0-9]", xc)
    xc[sn_idi] <- paste0(prefix_digits, xc[sn_idi])
  }
  xc
} # /rtemis::clean_names


#' Clean column names
#'
#' Clean column names by replacing all spaces and punctuation with a single underscore
#'
#' @param x Character vector OR any object with `colnames()` method, like matrix, data.frame,
#' data.table, tibble, etc.
#' @param lowercase Logical: If TRUE, convert to lowercase.
#' @param uppercase Logical: If TRUE, convert to uppercase.
#' @param titlecase Logical: If TRUE, convert to Title Case.
#'
#' @return Character vector with cleaned names.
#'
#' @author EDG
#' @export
#'
#' @examples
#' clean_colnames(iris, lowercase = FALSE, uppercase = FALSE, titlecase = FALSE)
clean_colnames <- function(
  x,
  lowercase = FALSE,
  uppercase = FALSE,
  titlecase = FALSE
) {
  # Check arguments: only one of lowercase, uppercase, or titlecase can be TRUE
  if (sum(c(lowercase, uppercase, titlecase)) > 1) {
    cli::cli_abort(
      "Only one of {.arg lowercase}, {.arg uppercase}, or {.arg titlecase} can be TRUE."
    )
  }
  if (!inherits(x, "character")) {
    x <- colnames(x)
  }
  if (lowercase) {
    clean_names(tolower(x))
  } else if (uppercase) {
    clean_names(toupper(x))
  } else if (titlecase) {
    gsub(" ", "_", tools::toTitleCase(clean_names(x, sep = " ")))
  } else {
    clean_names(x)
  }
} # /rtemis::clean_colnames


#' Force plain text when using `message()`
#'
#' @param x Character: Text to be output to console.
#'
#' @return Character: Text with ANSI escape codes removed.
#'
#' @author EDG
#' @keywords internal
#' @noRd
plain <- function(x) {
  paste0("\033[0m", x)
}


#' Oxford comma
#'
#' @param ... Character vector: Items to be combined.
#' @param format_fn Function: Any function to be applied to each item.
#'
#' @return Character: Formatted string with oxford comma.
#'
#' @author EDG
#' @keywords internal
#' @noRd
oxfordcomma <- function(..., format_fn = identity) {
  x <- unlist(list(...))
  if (length(x) > 2) {
    paste0(
      paste(sapply(x[-length(x)], format_fn), collapse = ", "),
      ", and ",
      format_fn(x[length(x)])
    )
  } else if (length(x) == 2) {
    paste(format_fn(x), collapse = " and ")
  } else {
    format_fn(x)
  }
} # /rtemis::oxfordcomma


#' Show S7 class name
#'
#' @param x Character: S7 class name.
#' @param col Color: Color code for the object name.
#' @param pad Integer: Number of spaces to pad the message with.
#' @param prefix Character: Prefix to add to the object name.
#' @param output_type Character {"ansi", "html", or "plain"}: Output type.
#'
#' @return Character: Formatted string that can be printed with cat().
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' repr_S7name("Supervised") |> cat()
repr_S7name <- function(
  x,
  col = col_object,
  bold = TRUE,
  underline = FALSE,
  pad = 0L,
  prefix = NULL,
  output_type = NULL
) {
  output_type <- get_output_type(output_type)

  if (S7_inherits(x)) {
    x <- S7_class(x)@name
  }

  paste0(
    strrep(" ", pad),
    fmt("<", col = highlight_col, output_type = output_type),
    if (!is.null(prefix)) {
      fmt(
        prefix,
        col = col_object,
        bold = bold,
        underline = underline,
        output_type = output_type
      )
    },
    fmt(
      x,
      col = col,
      bold = bold,
      underline = underline,
      output_type = output_type
    ),
    fmt(">", col = highlight_col, output_type = output_type),
    "\n"
  )
} # /rtemis::repr_S7name


#' Cat object
#'
#' @param x Character: Object description
#' @param col Character: Color code for the object name
#' @param pad Integer: Number of spaces to pad the message with.
#' @param verbosity Integer: Verbosity level. If > 1, adds package name to the output.
#' @param type Character: Output type ("ansi", "html", "plain")
#'
#' @return NULL: Prints the formatted object description to the console.
#'
#' @author EDG
#' @keywords internal
#' @noRd
objcat <- function(
  x,
  col = col_object,
  pad = 0L,
  prefix = NULL,
  output_type = c("ansi", "html", "plain")
) {
  output_type <- match.arg(output_type)

  out <- repr_S7name(
    x,
    col = col,
    pad = pad,
    prefix = prefix,
    output_type = output_type
  )
  cat(out)
} # /rtemis::objcat


#' Function to label
#'
#' Create axis label from function definition and variable name
#'
#' @param fn Function.
#' @param varname Character: Variable name.
#'
#' @return Character: Label.
#'
#' @author EDG
#' @keywords internal
#' @noRd
fn2label <- function(fn, varname) {
  # Get function body
  fn_body <- deparse(fn)[2]
  # Replace "x" with variable name
  sub("\\(x\\)", paste0("(", varname, ")"), fn_body)
} # /rtemis::fn2label


#' Padded cat
#'
#' @param x Character: Text to be output to console.
#' @param format_fn Function: Any function to be applied to `x`.
#' @param col Color: Any color fn.
#' @param newline_pre Logical: If TRUE, start with a new line.
#' @param newline Logical: If TRUE, end with a new (empty) line.
#' @param pad Integer: Pad message with this many spaces on the left.
#'
#' @author EDG
#' @keywords internal
#' @noRd
padcat <- function(
  x,
  format_fn = I,
  col = NULL,
  newline_pre = FALSE,
  newline = FALSE,
  pad = 2L
) {
  x <- as.character(x)
  if (!is.null(format_fn)) {
    x <- format_fn(x)
  }
  if (newline_pre) {
    cat("\n")
  }
  cat(strrep(" ", pad))
  if (!is.null(col)) {
    cat(col(x, TRUE))
  } else {
    cat(bold(x))
  }
  if (newline) {
    cat("\n")
  }
} # /rtemis::padcat


#' Pad string to target length
#'
#' @param x Character: String to pad.
#' @param target Integer: Target length.
#' @param char Character: Padding character.
#'
#' @return Character: Padded string.
#'
#' @author EDG
#' @keywords internal
#' @noRd
pad_string <- function(x, target = 17L, char = " ") {
  lpad <- max(0, target - max(0, nchar(x)))
  paste0(
    strrep(char, lpad),
    x
  )
} # /rtemis::pad_string


#' Pad left string to target length and print with right string
#'
#' @return Called for side effect: prints padded left string and right string.
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' {
#'   pcat("super", "wow")
#'   pcat(NULL, "oooo")
#' }
pcat <- function(left, right, target = 17, newline = TRUE) {
  cat(pad_string(left, target = target), right)
  if (newline) cat("\n")
}


#' Paste tables
#'
#' Collapses the contents of two tables element-wise with a separator
#' Table names are kept if same, otherwise also collapsed with separator
#'
#' @param left table: Left table.
#' @param right table: Right table.
#' @param sep Character: Separator between tables' values.
#'
#' @return table: Table with collapsed values and names.
#'
#' @author EDG
#' @keywords internal
#' @noRd
paste_tables <- function(left, right, sep = "=>") {
  # Check inputs are tables
  if (!inherits(left, "table") || !inherits(right, "table")) {
    cli::cli_abort(c(
      "x" = "Both {.arg left} and {.arg right} must be {.cls table} objects.",
      "i" = "Got: {.cls {class(left)}} and {.cls {class(right)}}."
    ))
  }

  # Check dimensions match
  if (!identical(dim(left), dim(right))) {
    cli::cli_abort(c(
      "x" = "Tables must have matching dimensions.",
      "i" = "Got dimensions: {dim(left)} and {dim(right)}."
    ))
  }

  # Paste values element-wise
  values <- paste(as.vector(left), as.vector(right), sep = sep)

  # Handle dimnames
  left_names <- dimnames(left)
  right_names <- dimnames(right)

  if (identical(left_names, right_names)) {
    # Keep names if identical
    result_names <- left_names
  } else {
    # Paste names element-wise if different
    result_names <- mapply(
      function(l, r) {
        if (identical(l, r)) l else paste(l, r, sep = sep)
      },
      left_names,
      right_names,
      SIMPLIFY = FALSE
    )
  }

  # Create result table
  result <- array(values, dim = dim(left), dimnames = result_names)
  class(result) <- "table"
  result
} # /rtemis::paste_tables


#' Paste data frames
#'
#' Collapses the contents of two data frames element-wise with a separator
#' Column names and row names are kept if same, otherwise also collapsed with separator
#'
#' @param left data.frame: Left data frame.
#' @param right data.frame: Right data frame.
#' @param sep Character: Separator between data frames' values.
#'
#' @return data.frame: Data frame with collapsed values and names.
#'
#' @author EDG
#' @keywords internal
#' @noRd
paste_dfs <- function(left, right, sep = "=>", decimal_places = 2L) {
  # Check inputs are data frames
  if (!inherits(left, "data.frame") || !inherits(right, "data.frame")) {
    cli::cli_abort(c(
      "x" = "Both {.arg left} and {.arg right} must be {.cls data.frame} objects.",
      "i" = "Got: {.cls {class(left)}} and {.cls {class(right)}}."
    ))
  }

  # Check dimensions match
  if (!identical(dim(left), dim(right))) {
    cli::cli_abort(c(
      "x" = "Data frames must have matching dimensions.",
      "i" = "Got dimensions: {dim(left)} and {dim(right)}."
    ))
  }

  # Paste values element-wise for each column
  result <- mapply(
    function(l, r) {
      paste(
        ddSci(l, decimal_places = decimal_places),
        ddSci(r, decimal_places = decimal_places),
        sep = sep
      )
    },
    left,
    right,
    SIMPLIFY = FALSE
  )

  # Handle column names
  left_colnames <- colnames(left)
  right_colnames <- colnames(right)

  if (identical(left_colnames, right_colnames)) {
    result_colnames <- left_colnames
  } else {
    result_colnames <- paste(left_colnames, right_colnames, sep = sep)
  }

  # Handle row names
  left_rownames <- rownames(left)
  right_rownames <- rownames(right)

  if (identical(left_rownames, right_rownames)) {
    result_rownames <- left_rownames
  } else {
    result_rownames <- paste(left_rownames, right_rownames, sep = sep)
  }

  # Create result data frame
  result_df <- as.data.frame(result, stringsAsFactors = FALSE)
  colnames(result_df) <- result_colnames
  rownames(result_df) <- result_rownames
  result_df
} # /rtemis::paste_dfs


================================================
FILE: R/utils_supervised.R
================================================
# super_ops.R
# ::rtemis::
# 2024- EDG rtemis.org

supervised_type <- function(dat) {
  if (is.factor(outcome(dat))) {
    "Classification"
  } else {
    "Regression"
  }
} # /rtemis::supervised_type

#' Convert probabilities to categorical (factor)
#'
#' @param x Numeric vector: Probabilities
#' @param levels Character vector: Class labels
#' @param binclasspos Integer: Index of the positive class for binary classification
#'
#' @return Factor
#' @author EDG
#'
#' @keywords internal
#' @noRd
#'
#' @examples
#' # Binary classification where "A" is the positive class, so .1 => B, .5 & .9 => A
#' prob2categorical(c(.1, .5, .9), c("A", "B"), 1)
#' # Binary classification where "B" is the positive class, so .1 => A, .5 & .9 => B
#' prob2categorical(c(.1, .5, .9), c("A", "B"), 2)
#' # Multi-class classification
#' prob <- matrix(c(.1, .3, .6, .05, .6, .35, .4, .3, .3), nrow = 3, byrow = TRUE)
#' prob2categorical(prob, c("A", "B", "C"))
prob2categorical <- function(x, levels, binclasspos = 2L) {
  n_classes <- length(levels)
  if (n_classes == 2) {
    # Binary classification
    stopifnot(binclasspos %in% c(1, 2))
    if (binclasspos == 1L) {
      levels <- rev(levels)
    }
    fitted <- factor(
      ifelse(x >= .5, 1, 0),
      levels = c(0, 1),
      labels = levels
    )
  } else {
    # Multi-class classification
    stopifnot(length(levels) == ncol(x))
    fitted <- factor(
      apply(x, 1, which.max),
      levels = seq_len(n_classes),
      labels = levels
    )
  }
  fitted
} # /rtemis::prob2categorical


#' @keywords internal
#' @noRd
check_supervised_inputs <- function(x, y = NULL) {
  if (is.null(y) && NCOL(x) < 2) {
    cli::cli_abort("y is missing")
  }
}

#' Move outcome to last column
#'
#' @param dat data.frame or similar.
#' @param outcome_column Character: Name of outcome column.
#'
#' @return object of same class as `data`
#'
#' @author EDG
#' @export
#'
#' @examples
#' ir <- set_outcome(iris, "Sepal.Length")
#' head(ir)
set_outcome <- function(dat, outcome_column) {
  # Get index of outcome column
  id <- grep(outcome_column, names(dat))
  # Check
  if (length(id) == 0) {
    cli::cli_abort('Column "{outcome_column}" not found in data.')
  }
  # Reorder columns
  # => Make S7 generic
  if (is.data.table(dat)) {
    dat[, c(setdiff(seq_len(NCOL(dat)), id), id), with = FALSE]
  } else {
    dat[, c(setdiff(seq_len(NCOL(dat)), id), id)]
  }
} # /rtemis::set_outcome


#' Make formula
#'
#' Makes a formula from a data.frame assuming the last column is the outcome
#'
#' @param x data.frame
#'
#' @return character
#' @author EDG
#'
#' @keywords internal
#' @noRd
make_formula <- function(x, output = "character") {
  outcome <- names(x)[NCOL(x)]
  out <- paste(outcome, "~ .")
  if (output == "formula") {
    as.formula(out, env = parent.env(parent.frame()))
  } else {
    out
  }
} # /rtemis::make_formula


# glm2table.R
# ::rtemis::
# 2021 EDG rtemis.org

#' Collect summary table from list of massGLMs with same predictors, different outcome
#' ("mass-y")
#'
#' @param x list of [glm] models
#' @param xnames Character, vector: names of models
#' @param include_anova Integer vector {1, 2, 3}: Output ANOVA Type I, II, and/or III
#' p-vals. Type I uses base R `anova()` (sequential); Types II and III use `car::Anova()`.
#' NA to skip.
#' @param info Logical: If TRUE, warn when values < than machine eps are replaced by
#' machine eps
#'
#' @return `data.table` with glm summaries
#' @author EDG
#'
#' @keywords internal
#' @noRd

glm2table <- function(x, xnames = NULL, include_anova = NA, info = TRUE) {
  if (is.null(xnames)) {
    xnames <- if (!is.null(names(x))) {
      names(x)
    } else {
      paste0("Variable_", seq_along(x))
    }
  }

  if (any(c(2L, 3L) %in% include_anova)) {
    check_dependencies("car")
  }

  out <- data.table(
    Variable = xnames,
    do.call(
      rbind,
      c(lapply(x, function(l) {
        out <- t(coef(summary(l))[-1, , drop = FALSE])
        varnames <- gsub(".*\\$", "", colnames(out))
        parnames <- c("Coefficient_", "SE_", "t_value_", "p_value_")
        out <- c(out)
        names(out) <- c(outer(parnames, varnames, paste0))
        out
      }))
    )
  )

  # Convert p-vals equal to 0 to machine double eps
  # eps <- .Machine[["double.eps"]]
  # pvals_idc <- getnames(out, starts_with = "p_value")
  # # appease R CMD check:, use with = FALSE, not ..i
  # for (i in pvals_idc) {
  #   lteps <- out[, i, with = FALSE] < eps
  #   if (length(lteps) > 0) {
  #     if (info) {
  #       cli::cli_inform("Values < machine double eps converted to double eps")
  #     }
  #     out[, i, with = FALSE][lteps] <- eps
  #   }
  # }

  term_labels <- x[[1]] |> terms() |> attr("term.labels")

  if (1 %in% include_anova) {
    pvals1 <- t(sapply(
      x,
      \(i) anova(i, test = "F")[seq_along(term_labels), 5]
    ))
    colnames(pvals1) <- paste(
      "p_value type I",
      term_labels
    )
    out <- cbind(out, pvals1)
  }

  if (2 %in% include_anova) {
    pvals2 <- t(sapply(
      x,
      \(i) car::Anova(i, type = 2)[seq_along(term_labels), 3]
    ))
    colnames(pvals2) <- paste(
      "p_value type II",
      term_labels
    )
    out <- cbind(out, pvals2)
  }

  if (3 %in% include_anova) {
    pvals3 <- t(sapply(
      x,
      \(i) car::Anova(i, type = 3)[seq_along(term_labels) + 1, 3]
    ))
    colnames(pvals3) <- paste(
      "p_value type III",
      term_labels
    )
    out <- cbind(out, pvals3)
  }

  out
} # /rtemis::glm2table


#' Collect summary table (p-values) from list of massGAMs with same predictors,
#' different outcome ("massy")
#'
#' @param mods list of [mgcv::gam] models.
#' @param modnames Character, vector: names of models.
#'
#' @return `data.table` with GAM p-value summaries.
#' @author EDG
#'
#' @keywords internal
#' @noRd
gam2table <- function(mods, modnames = NULL) {
  if (is.null(modnames)) {
    modnames <- if (!is.null(names(mods))) {
      names(mods)
    } else {
      paste0("Model_", seq_along(mods))
    }
  }

  out <- data.table(
    Variable = modnames,
    do.call(
      rbind,
      c(lapply(mods, get_gam_pvals))
    )
  )
  setnames(out, names(out)[-1], paste("p_value", names(out)[-1]))
  out
} # /rtemis::gam2table


#' Get GAM model's p-values for parametric and spline terms
#'
#' @keywords internal
#' @noRd
get_gam_pvals <- function(m, warn = TRUE) {
  eps <- .Machine[["double.eps"]]
  ms <- summary(m)
  pvals <- cbind(
    # s terms
    as.data.frame(t(ms[["s.table"]][, 4])),
    # p terms
    as.data.frame(t(ms[["p.table"]][, 4]))[-1]
  )
  lteps <- pvals < eps
  if (any(lteps)) {
    if (warn) {
      warning("Values < machine double eps converted to double eps")
    }
    pvals[lteps] <- eps
  }
  pvals
} # rtemis::get_gam_pvals


#' Class Imbalance
#'
#' Calculate class imbalance as given by:
#' \deqn{I = K\cdot\sum_{i=1}^K (n_i/N - 1/K)^2}{I = K * sum(n_i/N - 1/K)^2}
#' where \eqn{K} is the number of classes, and \eqn{n_i} is the number of
#' instances of class \eqn{i}
#'
#' @param x Vector, factor: Outcome.
#'
#' @return Numeric.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # iris is perfectly balanced
#' class_imbalance(iris[["Species"]])
#' # Simulate imbalanced outcome
#' x <- factor(sample(c("A", "B"), size = 500L, replace = TRUE, prob = c(0.9, 0.1)))
#' class_imbalance(x)
class_imbalance <- function(x) {
  if (!is.factor(x)) {
    cli::cli_abort("Input must be a factor")
  }
  K <- nlevels(x)
  N <- length(x)
  freq <- as.data.frame(table(x))

  K * sum(sapply(seq(K), function(i) (freq[["Freq"]][i] / N - 1 / K)^2))
} # /rtemis::class_imbalance


# expand_grid.R
# ::rtemis::
# 2025 EDG rtemis.org

#' Expand Grid
#'
#' Expand grid, converting NULL values to "null"
#'
#' Since the "null" characters in the resulting data.frame cannot be replaced to NULL,
#' they have to be converted back to NULL as needed downstream.
#' So make sure your data does not have cheeky character vector with "null" values in it that are
#' not actually NULLs.
#'
#' @param x named list
#'
#' @return data.frame
#'
#' @author EDG
#' @keywords internal
#' @noRd
#'
#' @examples
#' x <- list(a = c(1, 2, 3), b = NULL, c = c("z", "v"))
#' expand_grid(x)
expand_grid <- function(x, stringsAsFactors = FALSE) {
  stopifnot(is.list(x))
  # Convert all NULL to "null"
  x <- lapply(x, function(e) if (is.null(e)) "null" else e)
  # Expand grid
  expand.grid(x, stringsAsFactors = stringsAsFactors)
} # /expand_grid


================================================
FILE: R/utils_uniprot.R
================================================
# uniprot_get.R
# ::rtemis::
# 2022 E.D. Gennatas lambdamd.org

#' Get protein sequence from UniProt
#'
#' @param accession Character: UniProt Accession number - e.g. "Q9UMX9"
#' @param baseURL Character: UniProt rest API base URL.
#' Default = "https://rest.uniprot.org/uniprotkb"
#' @param verbosity Integer: Verbosity level.
#'
#' @return List with three elements: Identifier, Annotation, and Sequence.
#'
#' @author E.D. Gennatas
#' @export
#'
#' @examples
#' \dontrun{
#' # This gets the sequence from uniprot.org by default
#' mapt <- uniprot_get("Q9UMX9")
#' }
uniprot_get <- function(
  accession,
  baseURL = "https://rest.uniprot.org/uniprotkb",
  verbosity = 1
) {
  # Check types
  check_inherits(accession, "character")

  path <- paste0(baseURL, "/", accession, ".fasta")
  dat <- seqinr::read.fasta(path, seqtype = "AA")
  Annotation <- attr(dat[[1]], "Annot")
  Identifier <- gsub(" .*", "", Annotation)
  if (verbosity > 0L) {
    msg("Got:", highlight(Annotation))
  }

  list(
    Identifier = Identifier,
    Annotation = Annotation,
    Sequence = as.character(dat[[1]])
  )
} # /rtemis::uniprot_get


================================================
FILE: R/utils_xt.R
================================================
# xtdescribe.R
# ::rtemis::
# 2024 EDG

#' Describe longitudinal dataset
#'
#' This function emulates the `xtdescribe` function in Stata.
#'
#' @param x data.frame: Longitudinal data with ID and time variables.
#' @param id_col Integer: The column position of the ID variable.
#' @param time_col Integer: The column position of the time variable.
#' @param n_patterns Integer: The number of patterns to display.
#'
#' @return data.frame: Summary of participation patterns, returned invisibly.
#'
#' @author EDG
#' @export
#'
#' @examples
#' # Load example longitudinal dataset
#' data(xt_example)
#'
#' # Describe the longitudinal structure
#' xtdescribe(xt_example)
xtdescribe <- function(
  x,
  id_col = 1,
  time_col = 2,
  n_patterns = 9
) {
  id_name <- names(x)[id_col]
  time_name <- names(x)[time_col]
  # Print vec_describe of ID, with n = number of unique IDs
  id_us <- sort(unique(x[[id_name]]))
  n_ids <- length(id_us)
  time_us <- sort(unique(x[[time_name]]))
  time_min <- min(time_us)
  time_max <- max(time_us)
  leftwidth <- max(nchar(id_name), nchar(time_name))
  cat(
    pad_string(id_name, leftwidth),
    ": ",
    vec_describe(id_us),
    "                         n = ",
    length(id_us),
    "\n",
    sep = ""
  )
  cat(
    pad_string(time_name, leftwidth),
    ": ",
    vec_describe(time_us),
    "                         T = ",
    length(time_us),
    "\n",
    sep = ""
  )
  # Calculate delta for time variable
  # ?is this minimum delta?
  delta <- min(diff(time_us))
  cat(
    strrep(" ", leftwidth),
    "  Delta (",
    time_name,
    ") = ",
    delta,
    " unit\n",
    sep = ""
  )
  span <- ((max(time_us) - min(time_us)) / delta) + 1
  cat(
    strrep(" ", leftwidth),
    "  Span (",
    time_name,
    ") = ",
    span,
    " periods\n",
    sep = ""
  )
  #  Does id * time have unique values?
  id_time_unique <- length(unique(interaction(x[[id_name]], x[[time_name]]))) ==
    nrow(x)
  uid <- if (id_time_unique) {
    "uniquely identifies"
  } else {
    "does not uniquely identify"
  }
  cat(
    strrep(" ", leftwidth),
    "  (",
    id_name,
    "*",
    time_name,
    " ",
    uid,
    " each observation)\n",
    sep = ""
  )
  # Distribution of T_i at min, 5%, 25%, 50%, 75%, 95%, max
  cat(
    "\nDistribution of T_i:",
    "\tmin\t5%\t25%\t50%\t75%\t95%\tmax\n",
    sep = ""
  )
  id_freq <- table(x[[id_name]])
  id_freq_quant <- quantile(id_freq, c(0, 0.05, 0.25, 0.5, 0.75, 0.95, 1))
  cat(
    strrep(" ", 20),
    "\t",
    paste(id_freq_quant, collapse = "\t"),
    "\n",
    sep = ""
  )

  # Participation pattern by time
  # Get N IDs per time point, calculate pct of total IDs and rank
  id_time_freq <- table(x[[id_name]], x[[time_name]])
  # Insert columns of 0s for missing time points
  # id_time_freq <- cbind(id_time_freq, matrix(0, nrow = nrow(id_time_freq), ncol = span - ncol(id_time_freq)))
  # Add column names for missing time points
  missing_time_points <- setdiff(seq(time_min, time_max, by = delta), time_us)
  missing <- matrix(
    0,
    nrow = nrow(id_time_freq),
    ncol = length(missing_time_points)
  )
  colnames(missing) <- missing_time_points
  id_time_freq <- cbind(id_time_freq, missing)
  # Re order columns by name
  id_time_freq <- id_time_freq[, order(as.numeric(colnames(id_time_freq)))]

  # Convert to pattern matrix by pasting all columns by row
  # id_time_freq_char <- as.matrix(id_time_freq)
  # id_time_freq_char[id_time_freq_char == 0] <- "."
  id_time_freq_char <- matrix(
    as.character(id_time_freq),
    nrow = nrow(id_time_freq)
  )
  id_time_freq_char[id_time_freq_char == "0"] <- "."
  id_time_freq_pattern <- apply(id_time_freq_char, 1, paste, collapse = "")
  id_time_freq_pattern_freq <- table(id_time_freq_pattern)
  id_time_freq_pattern_sorted <- sort(
    id_time_freq_pattern_freq,
    decreasing = TRUE
  )

  # Make data.frame with Frequency, Percent, Cumulative Percent of top n_patterns and rest
  pattern_summary <- data.frame(
    `Freq.` = as.numeric(id_time_freq_pattern_sorted)[seq_len(n_patterns)]
  )
  pattern_summary[["Percent"]] <- round(
    (pattern_summary[["Freq."]] / n_ids) * 100,
    digits = 2
  )
  pattern_summary[["Cum."]] <- cumsum(pattern_summary[["Percent"]])
  pattern_summary[["Pattern"]] <- names(id_time_freq_pattern_sorted)[seq_len(
    n_patterns
  )]
  # Add Freq, Percent, Cumulative Percent of rest
  pattern_summary <- rbind(
    pattern_summary,
    data.frame(
      `Freq.` = sum(id_time_freq_pattern_sorted[-seq_len(n_patterns)]),
      Percent = round(
        (sum(id_time_freq_pattern_sorted[-seq_len(n_patterns)]) / n_ids) * 100,
        digits = 2
      ),
      `Cum.` = "100.00",
      Pattern = "(other patterns)"
    )
  )
  # Missing pattern is X for time points with data and . for time points with no data
  missing_pattern <- rep("X", ncol(id_time_freq))
  missing_pattern[colSums(id_time_freq) == 0] <- "."
  missing_pattern <- paste(missing_pattern, collapse = "")
  # Add row with Total
  pattern_summary <- rbind(
    pattern_summary,
    data.frame(
      `Freq.` = n_ids,
      Percent = "100.00",
      `Cum.` = "",
      Pattern = missing_pattern
    )
  )

  print(pattern_summary, row.names = FALSE)
  invisible(pattern_summary)
} # /rtemis::xtdescribe


#' Describe vector
#'
#' Helper function to describe a vector by showing the first 2 and last value, separated by "..."
#'
#' @param x Vector to describe.
#' @param sort_unique Logical: If TRUE, sort the unique values of the vector before describing.
#'
#' @return Character string describing the vector.
#'
#' @author EDG
#' @keywords internal
#' @noRd
vec_describe <- function(x, sort_unique = FALSE) {
  # sort_unique defaults to FALSE since it needs to be computed already
  # within xtdescribe
  xs <- if (sort_unique) sort(unique(x)) else x
  paste(xs[1], xs[2], "...", xs[length(xs)], sep = ", ")
}


================================================
FILE: R/zzz.R
================================================
# ▄▄▄▄  ▄▄▄▄▄▄▄▄ .• ▌ ▄ ·. ▪  .▄▄ ·
# ▀▄  █·•██  ▀▄.▀··██ ▐███▪██ ▐█ ▀.
# ▐▀▀▀▄  ▐█.▪▐▀▀▪▄▐█ ▌▐▌▐█·▐█·▄▀▀▀█▄
# ▐█•  █ ▐█▌·▐█▄▄▌██ ██▌▐█▌▐█▌▐█▄▪▐█
# .▀  ▀  ▀▀▀  ▀▀▀ ▀▀  █▪▀▀▀▀▀▀ ▀▀▀▀

# zzz.R
# ::rtemis::
# 2016- EDG rtemis.org

# rtemis internal environment
live <- new.env()
live[["parallelized_learners"]] <- c(
  "LightGBM",
  "LightRF",
  "LightRuleFit",
  "Ranger"
)
# msg() sink. NULL = console output (default).
# When set to a function, msg()/msg0()/msgstart()/msgdone() route their
# structured output through it instead of writing to the console.
# Used by rtemislive to forward training messages over a WebSocket.
# See `set_msg_sink()`.
live[["msg_sink"]] <- NULL

# vars
rtemis_version <- packageVersion("rtemis")
cores_available <- parallelly::availableCores()
cores_to_use <- max(cores_available - 3L, 1L)

# References
# Unicode emojis: https://www.unicode.org/emoji/charts/full-emoji-list.html

# Progress reporting
setup_progress <- function() {
  progressr::handlers(global = TRUE)
  progressr::handlers(
    progressr::handler_cli(
      format = "{cli::pb_spin} [{pb_current}/{pb_total}] {pb_status}",
      format_done = "{cli::col_green(cli::symbol$tick)} Completed {pb_total} tasks",
      show_after = 0,
      clear = FALSE
    )
  )
}

.onLoad <- function(libname, pkgname) {
  # S7
  S7::methods_register()
  # Set default options if not already set by user
  if (is.null(getOption("rtemis_theme"))) {
    options(rtemis_theme = "whitegrid")
  }
  if (is.null(getOption("rtemis_palette"))) {
    options(rtemis_palette = "rtms")
  }
  if (is.null(getOption("rtemis_font"))) {
    options(rtemis_font = "Helvetica")
  }
  # setup_progress()
}

.onAttach <- function(libname, pkgname) {
  if (interactive()) {
    # setup_progress()
    vline <- paste0(
      "\n  .:",
      bold(pkgname),
      " v.",
      rtemis_version,
      " \U1F30A",
      " ",
      sessionInfo()[[2]],
      " (",
      cores_available,
      " cores available)\n  "
    )
    packageStartupMessage(paste0(
      pkglogo(),
      vline,
      fmt_gradient(
        paste0(rep("\u2500", nchar(vline) - 13L), collapse = ""),
        colors = c(rt_red, rt_orange, rt_red)
      ),
      bold("\n  Defaults"),
      "\n  \u2502   ",
      gray("Theme: "),
      getOption("rtemis_theme", "whitegrid"),
      "\n  \u2502    ",
      gray("Font: "),
      getOption("rtemis_font", "Helvetica"),
      "\n  \u2514 ",
      gray("Palette: "),
      getOption("rtemis_palette", "rtms"),
      bold("\n  Resources"),
      "\n  \u2502    ",
      gray("Docs:"),
      " https://docs.rtemis.org/r/ml",
      "\n  \u2502 ",
      gray("Learn R:"),
      " https://pdsr.rtemis.org",
      "\n  \u2514    ",
      gray("Cite: "),
      rtcitation,
      "\n\n  ",
      fmt("PSA:", col = rt_red, bold = TRUE),
      " Do not throw data at algorithms. Compute responsibly!"
    ))
  } else {
    packageStartupMessage(
      paste0(
        "  .:",
        pkgname,
        " ",
        rtemis_version,
        " \U1F30A",
        " ",
        sessionInfo()[[2]]
      )
    )
  }
}


================================================
FILE: README.md
================================================
[![CRAN status](https://www.r-pkg.org/badges/version/rtemis)](https://CRAN.R-project.org/package=rtemis)
[![r-universe](https://rtemis-org.r-universe.dev/badges/rtemis)](https://rtemis-org.r-universe.dev/rtemis)
[![R-CMD-check](https://github.com/rtemis-org/rtemis/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/rtemis-org/rtemis/actions/workflows/R-CMD-check.yaml)

# rtemis: Advanced Machine Learning &amp; Visualization.

[![rtemis cover](https://www.rtemis.org/rtemis-cover.avif)](https://docs.rtemis.org/r/ml/)

This is the new version of the rtemis R package and remains under active development.

The new version (0.99+) features:

- Backend: complete rewrite of the supervised and unsupervised learning backend using the new [**S7** class system](https://github.com/RConsortium/S7), replacing all previous use of R6 and S3 classes.
- API: **Functional user-facing API**, to maintain a consistent, user-friendly interface.
- Extended use of **`setup_()`** functions, to offer increased transparency of configuration options.
- Strict **type checking** and **condition validation** throughout to minimize user error and provide highly focused error messages & suggestions.
- Expanded transparent messaging through each step.

![rtemis v1.0.0 console ascii](https://www.rtemis.org/rtemis-splash.webp)

## Installation

### From CRAN

```{r}
pak::pak("rtemis")
```

or

```r
install.packages("rtemis")
```

### Latest version from GitHub

```r
pak::pak("rtemis-org/rtemis")
```

### Latest version from `r-universe`

```r
pak::repo_add(myuniverse = "https://rtemis-org.r-universe.dev")
pak::pak("rtemis")
```

or using `install.packages`:

```r
install.packages(
  'rtemis',
  repos = c('https://rtemis-org.r-universe.dev', 'https://cloud.r-project.org')
)
```

## Installation of dependencies

Every `rtemis` call that uses external packages includes a check for required dependencies and will print a message if any are missing.

## Transparent messaging

It is essential to maintain transparency of operations at all times.
`rtemis` functions often call multiple other functions, sometime recursively. The package uses a formatted messaging system to provide logging output which includes:

- Timestamp
- Message
- Origin (function name)

Most function include a `verbosity` argument to control the level of messaging output, with support for three levels:

- `0`: silent
- `1`: normal messaging
- `2`: detailed messaging for debugging

## Text formatting

`rtemis` includes an automatic text formatting system, which supports:

- plain text output (for output to log files)
- ANSI colored output (for R console)
- HTML formatted output (for Quarto documents, shiny apps, etc.)

## `setup_` functions

Machine learning workflows involve multiple steps, each with their own configuration options.

It is essential that a) the user has complete control over each step, while maintaining an intuitive, user-friendly interface, and b) the user input is validated immediately and before a potentially long-running operation is started.

The following `setup_` functions are available to configure each step of the workflow:

- Supervised Learning:  `setup_CART()`, `setup_GAM()`, etc.
- Tuning: `setup_GridSearch()`
- Clustering: `setup_CMeans()`, `setup_HardCL()`, etc.
- Decomposition: `setup_NMF()`, `setup_ICA()`, etc.
- Resampling: `setup_Resampler()`
- Preprocessing: `setup_Preprocessor()`

## Supervised Learning

The following will perform hyperparameter tuning and 10-fold cross-validation.  
It will train `(3*3*2*5 + 1) * 25 = 2275` models total (!).

```r
mod <- train(
  dat,
  hyperparameters = setup_LightGBM(
        num_leaves = 2^(1:3),
        learning_rate = c(.001, .005, .01),
        subsample = c(.6, .9)
  ),
  outer_resampling_config = setup_Resampler(
    n_resamples = 25L,
    type = "StratSub"
  )
)
```

## Clustering

```r
clust <- cluster(
  dat,
  config = setup_CMeans(k = 4L)
)
```

## Decomposition

```r
decomp <- decompose(
  dat,
  config = setup_ICA(k = 12L)
)
```

## Changes from original implementation & Ongoing work

### Algorithms

The original version included a long list of algorithms for supervised and unsupervised learning for testing and experimentation, many of which were rarely used.
The initial release of the new version focuses on a smaller set of core algorithms, that will keep growing.

### Visualization

The original version included the `mplot3` family of visualization functions using base R graphics and the `dplot3` family using `plotly`.
The new release includes the `draw` family of functions, the evolution of the `dplot3` family.

## Documentation

The documentation is available at [docs.rtemis.org/r/ml](https://docs.rtemis.org/r/ml), which includes
walkthroughs of main features and full API reference.

## Ongoing work

There is a lot more coming - both within this package and the other packages in the rtemis framework.

## rtemisalpha

The original, unmaintained version of rtemis remains available as `rtemisalpha` at [rtemis-org/rtemis-legacy](https://github.com/rtemis-org/rtemis-legacy).

---

© 2016–2026 E.D. Gennatas. Licensed under [GPL (>= 3)](https://www.gnu.org/licenses/gpl-3.0.html).


================================================
FILE: data-raw/create_xt_example.R
================================================
# Create synthetic longitudinal dataset for xtdescribe() example
# ::rtemis::
# 2025 EDG

set.seed(2025)

# Create a small longitudinal dataset with various participation patterns
# 10 participants measured at up to 5 time points (years 2020-2024)

patient_id <- c(
  rep(1, 5), # Complete participation (all 5 time points)
  rep(2, 5), # Complete participation
  rep(3, 4), # Missing last time point
  rep(4, 4), # Missing first time point
  rep(5, 3), # Only first, middle, last
  rep(6, 2), # Only first two time points
  rep(7, 2), # Only last two time points
  rep(8, 3), # Missing 2nd and 4th time points
  rep(9, 1), # Only baseline
  rep(10, 1) # Only final time point
)

year <- c(
  2020:2024, # ID 1: all years
  2020:2024, # ID 2: all years
  2020:2023, # ID 3: missing 2024
  2021:2024, # ID 4: missing 2020
  c(2020, 2022, 2024), # ID 5: intermittent
  2020:2021, # ID 6: early dropout
  2023:2024, # ID 7: late entry
  c(2020, 2022, 2024), # ID 8: intermittent
  2020, # ID 9: baseline only
  2024 # ID 10: final only
)

# Generate outcome variable (e.g., blood pressure)
blood_pressure <- round(rnorm(length(patient_id), mean = 120, sd = 15), 1)

# Generate treatment group
treatment <- rep(c("A", "B"), length.out = length(patient_id))

# Create data.frame
xt_example <- data.frame(
  patient_id = patient_id,
  year = year,
  blood_pressure = blood_pressure,
  treatment = treatment
)

# Save to data/
usethis::use_data(xt_example, overwrite = TRUE)


================================================
FILE: inst/CITATION
================================================
utils::bibentry(
  header = "To cite rtemis in publications, please use:",

  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # BibTeX:
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  bibtype = "PhdThesis",
  title = "Towards Precision Psychiatry: Gray Matter Development and Cognition in Adolescence",
  author = "Efstathios D. Gennatas",
  year = "2017",
  school = "University of Pennsylvania",
  
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # Plain text:
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  textVersion = "Gennatas, E. D. (2017). Towards Precision Psychiatry: Gray Matter Development and Cognition in Adolescence (Doctoral dissertation, University of Pennsylvania)."
)


================================================
FILE: inst/resources/rtemis.utf8
================================================
▄▄▄▄  ▄▄▄▄▄▄▄▄ .• ▌ ▄ ·. ▪  .▄▄ ·
▀▄  █·•██  ▀▄.▀··██ ▐███▪██ ▐█ ▀.
▐▀▀▀▄  ▐█.▪▐▀▀▪▄▐█ ▌▐▌▐█·▐█·▄▀▀▀█▄
▐█•  █ ▐█▌·▐█▄▄▌██ ██▌▐█▌▐█▌▐█▄▪▐█
.▀  ▀  ▀▀▀  ▀▀▀ ▀▀  █▪▀▀▀▀▀▀ ▀▀▀▀


================================================
FILE: inst/resources/rtemis2.utf8
================================================
 ██▀███  ▄▄▄█████▓▓█████  ███▄ ▄███▓ ██▓  ██████ 
▓██ ▒ ██▒▓  ██▒ ▓▒▓█   ▀ ▓██▒▀█▀ ██▒▓██▒▒██    ▒ 
▓██ ░▄█ ▒▒ ▓██░ ▒░▒███   ▓██  █ ▓██░▒██▒░ ▓██▄   
▒██▀▀█▄  ░ ▓██▓ ░ ▒▓█  ▄ ▒██  ░ ▒██ ░██░  ▒   ██▒
░██▓ ▒██▒  ▒██▒ ░ ░▒████▒▒██▒   ░██▒░██░▒██████▒▒
░ ▒▓ ░▒▓░  ▒ ░░   ░░ ▒░ ░░ ▒░ ░ ░  ░░▓  ▒ ▒▓▒ ▒ ░
  ░▒ ░ ▒░    ░     ░ ░  ░░  ░      ░ ▒ ░░ ░▒  ░ ░
  ░░   ░   ░         ░   ░      ░    ▒ ░░  ░  ░  
   ░                 ░  ░       ░    ░        ░  


================================================
FILE: man/available_algorithms.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/algorithmDB.R
\name{available_supervised}
\alias{available_supervised}
\alias{available_algorithms}
\alias{available_clustering}
\alias{available_decomposition}
\title{Available Algorithms}
\usage{
available_supervised(verbosity = 1L)

available_clustering(verbosity = 1L)

available_decomposition(verbosity = 1L)
}
\arguments{
\item{verbosity}{Integer: Verbosity level.}
}
\value{
Named list of algorithm descriptions, invisibly.
}
\description{
Print available algorithms for supervised learning, clustering, and decomposition.
}
\examples{
available_supervised()
available_clustering()
available_decomposition()
}
\author{
EDG
}


================================================
FILE: man/available_draw.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/algorithmDB.R
\name{available_draw}
\alias{available_draw}
\title{Available Draw Functions}
\usage{
available_draw(verbosity = 1L)
}
\arguments{
\item{verbosity}{Integer: Verbosity level.}
}
\value{
Named list of draw function descriptions, invisibly.
}
\description{
Print available draw functions for visualization.
}
\examples{
available_draw()
}
\author{
EDG
}


================================================
FILE: man/available_themes.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/theme.R
\name{available_themes}
\alias{available_themes}
\title{Print available \pkg{rtemis} themes}
\usage{
available_themes()
}
\value{
Called for its side effect of printing available themes.
}
\description{
Print available \pkg{rtemis} themes
}
\examples{
available_themes()
}
\author{
EDG
}


================================================
FILE: man/calibrate.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{calibrate}
\alias{calibrate}
\title{Calibrate \code{Classification} & \code{ClassificationRes} Models}
\usage{
calibrate(
  x,
  algorithm = "isotonic",
  hyperparameters = NULL,
  verbosity = 1L,
  ...
)
}
\arguments{
\item{x}{\code{Classification} or \code{ClassificationRes} object to calibrate.}

\item{algorithm}{Character: Algorithm to use to train calibration model.}

\item{hyperparameters}{\code{Hyperparameters} object: Setup using one of \verb{setup_*} functions.}

\item{verbosity}{Integer: Verbosity level.}

\item{...}{Additional arguments passed to specific methods.}
}
\value{
Calibrated model object.
}
\description{
Generic function to calibrate binary classification models.
}
\details{
The goal of calibration is to adjust the predicted probabilities of a binary classification
model so that they better reflect the true probabilities (i.e. empirical risk) of the positive
class.
}
\section{Method-specific parameters}{


\strong{For \code{Classification} objects:}
\itemize{
\item \code{predicted_probabilities}: Numeric vector of predicted probabilities
\item \code{true_labels}: Factor of true class labels
}

\strong{For \code{ClassificationRes} objects:}
\itemize{
\item \code{resampler_config}: \code{ResamplerConfig} object for calibration training
\item \code{train_verbosity}: Integer controlling calibration model training output
}
}

\examples{
# --- Calibrate Classification ---
dat <- iris[51:150, ]
res <- resample(dat)
dat$Species <- factor(dat$Species)
dat_train <- dat[res[[1]], ]
dat_test <- dat[-res[[1]], ]

# Train GLM on a training/test split
mod_c_glm <- train(
  x = dat_train,
  dat_test = dat_test,
  algorithm = "glm"
)

# Calibrate the `Classification` by defining `predicted_probabilities` and `true_labels`,
# in this case using the training data, but it could be a separate calibration dataset.
mod_c_glm_cal <- calibrate(
  mod_c_glm,
  predicted_probabilities = mod_c_glm$predicted_prob_training,
  true_labels = mod_c_glm$y_training
)
mod_c_glm_cal

# --- Calibrate ClassificationRes ---

# Train GLM with cross-validation
resmod_c_glm <- train(
  x = dat,
  algorithm = "glm",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
)

# Calibrate the `ClassificationRes` using the same resampling configuration as used for training.
resmod_c_glm_cal <- calibrate(resmod_c_glm)
resmod_c_glm_cal
}
\author{
EDG
}


================================================
FILE: man/check_data.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/check_data.R
\name{check_data}
\alias{check_data}
\title{Check Data}
\usage{
check_data(
  x,
  name = NULL,
  get_duplicates = TRUE,
  get_na_case_pct = FALSE,
  get_na_feature_pct = FALSE
)
}
\arguments{
\item{x}{tabular data: Input to be checked.}

\item{name}{Character: Name of dataset.}

\item{get_duplicates}{Logical: If TRUE, check for duplicate cases.}

\item{get_na_case_pct}{Logical: If TRUE, calculate percent of NA values per
case.}

\item{get_na_feature_pct}{Logical: If TRUE, calculate percent of NA values
per feature.}
}
\value{
\code{CheckData} object.
}
\description{
Check Data
}
\examples{
n <- 1000
x <- rnormmat(n, 50, return_df = TRUE)
x$char1 <- sample(letters, n, TRUE)
x$char2 <- sample(letters, n, TRUE)
x$fct <- factor(sample(letters, n, TRUE))
x <- rbind(x, x[1, ])
x$const <- 99L
x[sample(nrow(x), 20), 3] <- NA
x[sample(nrow(x), 20), 10] <- NA
x$fct[30:35] <- NA
check_data(x)
}
\author{
EDG
}


================================================
FILE: man/choose_theme.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/theme.R
\name{choose_theme}
\alias{choose_theme}
\title{Select an rtemis theme}
\usage{
choose_theme(
  x = c("white", "whitegrid", "whiteigrid", "black", "blackgrid", "blackigrid",
    "darkgray", "darkgraygrid", "darkgrayigrid", "lightgraygrid", "mediumgraygrid"),
  override = NULL
)
}
\arguments{
\item{x}{Character: Name of theme to select. If not defined, will use \code{getOption("rtemis_theme", "whitegrid")}.}

\item{override}{Optional List: Theme parameters to override defaults.}
}
\value{
\code{Theme} object.
}
\description{
Select an rtemis theme
}
\details{
If \code{x} is not defined, \code{choose_theme()} will use \code{getOption("rtemis_theme", "whitegrid")} to
select the theme. This allows users to set a default theme for all rtemis plots by setting
\code{options(rtemis_theme = "theme_name")} at any point.
}
\examples{
# Get default theme set by options(rtemis_theme = "theme_name").
# If not set, defaults to "whitegrid":
choose_theme()
# Get darkgraygrid theme. Same as `theme_darkgraygrid()`:
choose_theme("darkgraygrid")
# This will use the default theme, and override the foreground color to red:
choose_theme(override = list(fg = "#ff0000"))
}
\author{
EDG
}


================================================
FILE: man/class_imbalance.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_supervised.R
\name{class_imbalance}
\alias{class_imbalance}
\title{Class Imbalance}
\usage{
class_imbalance(x)
}
\arguments{
\item{x}{Vector, factor: Outcome.}
}
\value{
Numeric.
}
\description{
Calculate class imbalance as given by:
\deqn{I = K\cdot\sum_{i=1}^K (n_i/N - 1/K)^2}{I = K * sum(n_i/N - 1/K)^2}
where \eqn{K} is the number of classes, and \eqn{n_i} is the number of
instances of class \eqn{i}
}
\examples{
# iris is perfectly balanced
class_imbalance(iris[["Species"]])
# Simulate imbalanced outcome
x <- factor(sample(c("A", "B"), size = 500L, replace = TRUE, prob = c(0.9, 0.1)))
class_imbalance(x)
}
\author{
EDG
}


================================================
FILE: man/classification_metrics.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metrics.R
\name{classification_metrics}
\alias{classification_metrics}
\title{Classification Metrics}
\usage{
classification_metrics(
  true_labels,
  predicted_labels,
  predicted_prob = NULL,
  binclasspos = 2L,
  calc_auc = TRUE,
  calc_brier = TRUE,
  auc_method = "lightAUC",
  sample = character(),
  verbosity = 0L
)
}
\arguments{
\item{true_labels}{Factor: True labels.}

\item{predicted_labels}{Factor: predicted values.}

\item{predicted_prob}{Numeric vector: predicted probabilities.}

\item{binclasspos}{Integer: Factor level position of the positive class in binary classification.}

\item{calc_auc}{Logical: If TRUE, calculate AUC. May be slow in very large datasets.}

\item{calc_brier}{Logical: If TRUE, calculate Brier_Score.}

\item{auc_method}{Character: "lightAUC", "pROC", "ROCR".}

\item{sample}{Character: Sample name.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{ClassificationMetrics} object.
}
\description{
Classification Metrics
}
\details{
Note that auc_method = "pROC" is the only one that will output an AUC even if
one or more predicted probabilities are NA.
}
\examples{
# Assume positive class is "b"
true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b"))
predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a"))
predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39)

classification_metrics(true_labels, predicted_labels, predicted_prob)
classification_metrics(true_labels, predicted_labels, 1 - predicted_prob, binclasspos = 1L)
}
\author{
EDG
}


================================================
FILE: man/clean_colnames.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_strings.R
\name{clean_colnames}
\alias{clean_colnames}
\title{Clean column names}
\usage{
clean_colnames(x, lowercase = FALSE, uppercase = FALSE, titlecase = FALSE)
}
\arguments{
\item{x}{Character vector OR any object with \code{colnames()} method, like matrix, data.frame,
data.table, tibble, etc.}

\item{lowercase}{Logical: If TRUE, convert to lowercase.}

\item{uppercase}{Logical: If TRUE, convert to uppercase.}

\item{titlecase}{Logical: If TRUE, convert to Title Case.}
}
\value{
Character vector with cleaned names.
}
\description{
Clean column names by replacing all spaces and punctuation with a single underscore
}
\examples{
clean_colnames(iris, lowercase = FALSE, uppercase = FALSE, titlecase = FALSE)
}
\author{
EDG
}


================================================
FILE: man/clean_names.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_strings.R
\name{clean_names}
\alias{clean_names}
\title{Clean names}
\usage{
clean_names(x, sep = "_", prefix_digits = "V_")
}
\arguments{
\item{x}{Character vector.}

\item{sep}{Character: Separator to replace symbols with.}

\item{prefix_digits}{Character: prefix to add to names beginning with a
digit. Set to NA to skip.}
}
\value{
Character vector.
}
\description{
Clean character vector by replacing all symbols and sequences of symbols with single
underscores, ensuring no name begins or ends with a symbol
}
\examples{
x <- c("Patient ID", "_Date-of-Birth", "SBP (mmHg)")
x
clean_names(x)
clean_names(x, sep = " ")
}
\author{
EDG
}


================================================
FILE: man/cluster.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cluster.R
\name{cluster}
\alias{cluster}
\title{Perform Clustering}
\usage{
cluster(x, algorithm = "KMeans", config = NULL, verbosity = 1L)
}
\arguments{
\item{x}{Matrix or data.frame: Data to cluster. Rows are cases to be clustered.}

\item{algorithm}{Character: Clustering algorithm.}

\item{config}{List: Algorithm-specific config.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{Clustering} object.
}
\description{
Perform clustering on the rows (usually cases) of a dataset.
}
\details{
See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation.
}
\examples{
iris_km <- cluster(exc(iris, "Species"), algorithm = "KMeans")
}
\author{
EDG
}


================================================
FILE: man/col2grayscale.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_color.R
\name{col2grayscale}
\alias{col2grayscale}
\title{Color to Grayscale}
\usage{
col2grayscale(x, what = c("color", "decimal"))
}
\arguments{
\item{x}{Color to convert to grayscale}

\item{what}{Character: "color" returns a hexadecimal color,
"decimal" returns a decimal between 0 and 1}
}
\value{
Character: color hex code.
}
\description{
Convert a color to grayscale
}
\details{
Uses the NTSC grayscale conversion:
0.299 * R + 0.587 * G + 0.114 * B
}
\examples{
col2grayscale("red")
col2grayscale("red", "dec")
}
\author{
EDG
}


================================================
FILE: man/color_adjust.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_color.R
\name{color_adjust}
\alias{color_adjust}
\title{Adjust HSV Color}
\usage{
color_adjust(color, alpha = NULL, hue = 0, sat = 0, val = 0)
}
\arguments{
\item{color}{Input color. Any format that grDevices::col2rgb() recognizes}

\item{alpha}{Numeric: Scale alpha by this amount. Future: replace with absolute setting}

\item{hue}{Float: How much hue to add to \code{color}}

\item{sat}{Float: How much saturation to add to \code{color}}

\item{val}{Float: How much to increase value of \code{color} by}
}
\value{
Adjusted color
}
\description{
Modify alpha, hue, saturation and value (HSV) of a color
}
\examples{
previewcolor(c(teal = "#00ffff", teal50 = color_adjust("#00ffff", alpha = 0.5)))
}
\author{
EDG
}


================================================
FILE: man/ddSci.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ddSci.R
\name{ddSci}
\alias{ddSci}
\title{Format Numbers for Printing}
\usage{
ddSci(x, decimal_places = 2, hi = 1e+06, as_numeric = FALSE)
}
\arguments{
\item{x}{Vector of numbers}

\item{decimal_places}{Integer: Return this many decimal places.}

\item{hi}{Float: Threshold at or above which scientific notation is used.}

\item{as_numeric}{Logical: If TRUE, convert to numeric before returning.
This will not force all numbers to print 2 decimal places. For example:
1.2035 becomes "1.20" if \code{as_numeric = FALSE}, but 1.2 otherwise
This can be helpful if you want to be able to use the output as numbers / not just for printing.}
}
\value{
Formatted number
}
\description{
2 Decimal places, otherwise scientific notation
}
\details{
Numbers will be formatted to 2 decimal places, unless this results in 0.00 (e.g. if input was .0032),
in which case they will be converted to scientific notation with 2 significant figures.
\code{ddSci} will return \code{0.00} if the input is exactly zero.
This function can be used to format numbers in plots, on the console, in logs, etc.
}
\examples{
x <- .34876549
ddSci(x)
# "0.35"
x <- .00000000457823
ddSci(x)
# "4.6e-09"
}
\author{
EDG
}


================================================
FILE: man/ddb_collect.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ddb.R
\name{ddb_collect}
\alias{ddb_collect}
\title{Collect a lazy-read duckdb table}
\usage{
ddb_collect(sql, progress = TRUE, returnobj = c("data.frame", "data.table"))
}
\arguments{
\item{sql}{Character: DuckDB SQL query, usually output of
\link{ddb_data} with \code{collect = FALSE}}

\item{progress}{Logical: If TRUE, show progress bar}

\item{returnobj}{Character: data.frame or data.table: class of object to return}
}
\value{
\code{data.frame} or \code{data.table}.
}
\description{
Collect a table read with \code{ddb_data(x, collect = FALSE)}
}
\examples{
\dontrun{
  # Requires local CSV file; replace with your own path
  sql <- ddb_data("/Data/iris.csv", collect = FALSE)
  ir <- ddb_collect(sql)
}
}
\author{
EDG
}


================================================
FILE: man/ddb_data.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ddb.R
\name{ddb_data}
\alias{ddb_data}
\title{Read CSV using DuckDB}
\usage{
ddb_data(
  filename,
  datadir = NULL,
  sep = ",",
  header = TRUE,
  quotechar = "",
  ignore_errors = TRUE,
  make_unique = TRUE,
  select_columns = NULL,
  filter_column = NULL,
  filter_vals = NULL,
  character2factor = FALSE,
  collect = TRUE,
  progress = TRUE,
  returnobj = c("data.table", "data.frame"),
  data.table.key = NULL,
  clean_colnames = TRUE,
  verbosity = 1L
)
}
\arguments{
\item{filename}{Character: file name; either full path or just the file name,
if \code{datadir} is also provided.}

\item{datadir}{Character: Optional path if \code{filename} is not full path.}

\item{sep}{Character: Field delimiter/separator.}

\item{header}{Logical: If TRUE, first line will be read as column names.}

\item{quotechar}{Character: Quote character.}

\item{ignore_errors}{Logical: If TRUE, ignore parsing errors (sometimes it's
either this or no data, so).}

\item{make_unique}{Logical: If TRUE, keep only unique rows.}

\item{select_columns}{Character vector: Column names to select.}

\item{filter_column}{Character: Name of column to filter on, e.g. "ID".}

\item{filter_vals}{Numeric or Character vector: Values in \code{filter_column} to keep.
\code{filter_column} to keep.}

\item{character2factor}{Logical: If TRUE, convert character columns to
factors.}

\item{collect}{Logical: If TRUE, collect data and return structure class
as defined by \code{returnobj}.}

\item{progress}{Logical: If TRUE, print progress (no indication this works).}

\item{returnobj}{Character: "data.frame" or "data.table" object class to
return. If "data.table", data.frame object returned from
\code{DBI::dbGetQuery} is passed to \code{data.table::setDT}; will add to
execution time if very large, but then that's when you need a data.table.}

\item{data.table.key}{Character: If set, this corresponds to a column name in the
dataset. This column will be set as key in the data.table output.}

\item{clean_colnames}{Logical: If TRUE, clean colnames with
\link{clean_colnames}.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
data.frame or data.table if \code{collect} is TRUE, otherwise a character with the SQL query
}
\description{
Lazy-read a CSV file, optionally: filter rows, remove duplicates,
clean column names, convert character to factor, collect.
}
\examples{
\dontrun{
  # Requires local CSV file; replace with your own path
  ir <- ddb_data("/Data/massive_dataset.csv",
    filter_column = "ID",
    filter_vals = 8001:9999
  )
}
}
\author{
EDG
}


================================================
FILE: man/decomp.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/decomp.R
\name{decomp}
\alias{decomp}
\title{Perform Data Decomposition}
\usage{
decomp(x, algorithm = "ICA", config = NULL, verbosity = 1L)
}
\arguments{
\item{x}{Matrix or data frame: Input data.}

\item{algorithm}{Character: Decomposition algorithm.}

\item{config}{DecompositionConfig: Algorithm-specific config.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{Decomposition} object.
}
\description{
Perform linear or non-linear decomposition of numeric data.
}
\details{
See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation.
}
\examples{
iris_pca <- decomp(exc(iris, "Species"), algorithm = "PCA")
}
\author{
EDG
}


================================================
FILE: man/describe.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{describe}
\alias{describe}
\title{Describe object}
\usage{
describe(x, ...)
}
\arguments{
\item{x}{R object to describe. See method documentation for supported classes.}

\item{...}{Additional arguments passed to methods. See details.}
}
\description{
Describe object
}
\details{
Extra arguments for \code{factor} method:
\itemize{
\item \code{max_n}: Integer: Return counts for up to this many levels.
\item \code{return_ordered}: Logical: If TRUE, return levels ordered by count, otherwise return in level order.
\item \code{verbosity}: Integer: Verbosity level.
}
}
\examples{
# --- For `Supervised` objects ---
species_lightrf <- train(iris, algorithm = "lightrf")
describe(species_lightrf)

# --- For `SupervisedRes` objects ---
mod <- train(iris, algorithm = "CART", outer_resampling_config = setup_Resampler())
describe(mod)

# --- For factors ---
# Small number of levels
describe(iris[["Species"]])

# Large number of levels: show top n by count
x <- factor(sample(letters, 1000, TRUE))
describe(x)
describe(x, 3)
describe(x, 3, return_ordered = FALSE)
}
\author{
EDG
}


================================================
FILE: man/df_movecolumn.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_df.R
\name{df_movecolumn}
\alias{df_movecolumn}
\title{Move data frame column}
\usage{
df_movecolumn(x, colname, to = ncol(x))
}
\arguments{
\item{x}{data.frame.}

\item{colname}{Character: Name of column you want to move.}

\item{to}{Integer: Which column position to move the vector to.
Default = \code{ncol(x)} i.e. the last column.}
}
\value{
data.frame
}
\description{
Move data frame column
}
\examples{
ir <- df_movecolumn(iris, colname = "Species", to = 1L)
}
\author{
EDG
}


================================================
FILE: man/df_nunique_perfeat.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_df.R
\name{df_nunique_perfeat}
\alias{df_nunique_perfeat}
\title{Unique values per feature}
\usage{
df_nunique_perfeat(x, excludeNA = FALSE)
}
\arguments{
\item{x}{matrix or data frame input}

\item{excludeNA}{Logical: If TRUE, exclude NA values from unique count.}
}
\value{
Vector, integer of length \code{NCOL(x)} with number of unique
values per column/feature
}
\description{
Get number of unique values per features
}
\examples{
df_nunique_perfeat(iris)
}
\author{
EDG
}


================================================
FILE: man/dot-list_to_Hyperparameters.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{.list_to_Hyperparameters}
\alias{.list_to_Hyperparameters}
\title{Convert a list to a Hyperparameters object}
\usage{
.list_to_Hyperparameters(x)
}
\arguments{
\item{x}{Named list with two elements:
\describe{
\item{\code{algorithm}}{Character: algorithm name, e.g. \code{"GLM"}, \code{"RF"}.}
\item{\code{hyperparameters}}{Named list of hyperparameter name-value pairs
passed to the corresponding \verb{setup_<algorithm>()} function.}
}}
}
\value{
A \code{Hyperparameters} object as returned by \verb{setup_<algorithm>()}.
}
\description{
Internal function used by \code{rtemis.server} to reconstruct a \code{Hyperparameters}
object from a wire-format list. Not intended for direct use by end users.
}
\author{
EDG
}
\keyword{internal}


================================================
FILE: man/dot-list_to_ResamplerConfig.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/05_Resampler.R
\name{.list_to_ResamplerConfig}
\alias{.list_to_ResamplerConfig}
\title{Convert a list to a ResamplerConfig object}
\usage{
.list_to_ResamplerConfig(x)
}
\arguments{
\item{x}{Named list with the following elements:
\describe{
\item{\code{type}}{Character: resampler type — one of \code{"KFold"},
\code{"StratSub"}, \code{"StratBoot"}, \code{"Bootstrap"}, \code{"LOOCV"}, \code{"Custom"}.}
\item{\code{n}}{Integer: number of resamples (not used for \code{"LOOCV"}).}
\item{\code{train_p}}{Numeric: training proportion (used by \code{"StratSub"} and
\code{"StratBoot"}).}
\item{\code{stratify_var}}{Character or \code{NULL}: stratification variable name.}
\item{\code{strat_n_bins}}{Integer: number of bins for stratification.}
\item{\code{target_length}}{Integer or \code{NULL}: target resample length
(\code{"StratBoot"} only).}
\item{\code{id_strat}}{Character or \code{NULL}: ID stratification variable.}
\item{\code{seed}}{Integer or \code{NULL}: random seed.}
}}
}
\value{
A \code{ResamplerConfig} object of the appropriate subtype.
}
\description{
Internal function used by \code{rtemis.server} and \code{SuperConfig} deserialization
to reconstruct a \code{ResamplerConfig} object from a named list. Not intended
for direct use by end users.
}
\author{
EDG
}
\keyword{internal}


================================================
FILE: man/dot-list_to_TunerConfig.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/06_Tuner.R
\name{.list_to_TunerConfig}
\alias{.list_to_TunerConfig}
\title{Convert a list to a TunerConfig object}
\usage{
.list_to_TunerConfig(x)
}
\arguments{
\item{x}{Named list with two elements:
\describe{
\item{\code{type}}{Character: tuner type. Currently only \code{"GridSearch"} is
supported.}
\item{\code{config}}{Named list of tuner configuration fields. For
\code{"GridSearch"}: \code{resampler_config} (a list accepted by
\code{\link[=.list_to_ResamplerConfig]{.list_to_ResamplerConfig()}}), \code{search_type}, \code{randomize_p},
\code{metrics_aggregate_fn}, \code{metric}, and \code{maximize}.}
}}
}
\value{
A \code{TunerConfig} object (currently a \code{GridSearchConfig}).
}
\description{
Internal function used by \code{rtemis.server} and \code{SuperConfig} deserialization
to reconstruct a \code{TunerConfig} object from a named list. Not intended for
direct use by end users.
}
\author{
EDG
}
\keyword{internal}


================================================
FILE: man/draw_3Dscatter.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_3Dscatter.R
\name{draw_3Dscatter}
\alias{draw_3Dscatter}
\title{Interactive 3D Scatter Plots}
\usage{
draw_3Dscatter(
  x,
  y = NULL,
  z = NULL,
  fit = NULL,
  cluster = NULL,
  cluster_config = NULL,
  group = NULL,
  formula = NULL,
  rsq = TRUE,
  mode = "markers",
  order_on_x = NULL,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  zlab = NULL,
  alpha = 0.8,
  bg = NULL,
  plot_bg = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  axes_square = FALSE,
  group_names = NULL,
  font_size = 16,
  marker_col = NULL,
  marker_size = 8,
  fit_col = NULL,
  fit_alpha = 0.7,
  fit_lwd = 2.5,
  tick_font_size = 12,
  spike_col = NULL,
  legend = NULL,
  legend_xy = c(0, 1),
  legend_xanchor = "left",
  legend_yanchor = "auto",
  legend_orientation = "v",
  legend_col = NULL,
  legend_bg = "#FFFFFF00",
  legend_border_col = "#FFFFFF00",
  legend_borderwidth = 0,
  legend_group_gap = 0,
  margin = list(t = 30, b = 0, l = 0, r = 0),
  fit_params = NULL,
  width = NULL,
  height = NULL,
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  verbosity = 0L,
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
)
}
\arguments{
\item{x}{Numeric, vector/data.frame/list: x-axis data.}

\item{y}{Numeric, vector/data.frame/list: y-axis data.}

\item{z}{Numeric, vector/data.frame/list: z-axis data.}

\item{fit}{Character: Fit method.}

\item{cluster}{Character: Clustering method.}

\item{cluster_config}{List: Config for clustering.}

\item{group}{Factor: Grouping variable.}

\item{formula}{Formula: Formula for non-linear least squares fit.}

\item{rsq}{Logical: If TRUE, print R-squared values in legend if \code{fit} is set.}

\item{mode}{Character, vector: "markers", "lines", "markers+lines".}

\item{order_on_x}{Logical: If TRUE, order \code{x} and \code{y} on \code{x}.}

\item{main}{Character: Main title.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{zlab}{Character: z-axis label.}

\item{alpha}{Numeric: Alpha for markers.}

\item{bg}{Background color.}

\item{plot_bg}{Plot background color.}

\item{theme}{\code{Theme} object.}

\item{palette}{Character vector: Colors to use.}

\item{axes_square}{Logical: If TRUE, draw a square plot.}

\item{group_names}{Character: Names for groups.}

\item{font_size}{Numeric: Font size.}

\item{marker_col}{Color for markers.}

\item{marker_size}{Numeric: Marker size.}

\item{fit_col}{Color for fit line.}

\item{fit_alpha}{Numeric: Alpha for fit line.}

\item{fit_lwd}{Numeric: Line width for fit line.}

\item{tick_font_size}{Numeric: Tick font size.}

\item{spike_col}{Spike lines color.}

\item{legend}{Logical: If TRUE, draw legend.}

\item{legend_xy}{Numeric: Position of legend.}

\item{legend_xanchor}{Character: X anchor for legend.}

\item{legend_yanchor}{Character: Y anchor for legend.}

\item{legend_orientation}{Character: Orientation of legend.}

\item{legend_col}{Color for legend text.}

\item{legend_bg}{Color for legend background.}

\item{legend_border_col}{Color for legend border.}

\item{legend_borderwidth}{Numeric: Border width for legend.}

\item{legend_group_gap}{Numeric: Gap between legend groups.}

\item{margin}{Numeric, named list: Margins for top, bottom, left, right.}

\item{fit_params}{\code{Hyperparameters} for fit.}

\item{width}{Numeric: Width of plot.}

\item{height}{Numeric: Height of plot.}

\item{padding}{Numeric: Graph padding.}

\item{displayModeBar}{Logical: If TRUE, display mode bar.}

\item{modeBar_file_format}{Character: File format for mode bar.}

\item{verbosity}{Integer: Verbosity level.}

\item{filename}{Character: Filename to save plot.}

\item{file_width}{Numeric: Width of saved file.}

\item{file_height}{Numeric: Height of saved file.}

\item{file_scale}{Numeric: Scale of saved file.}
}
\value{
A \code{plotly} object.
}
\description{
Draw interactive 3D scatter plots using \code{plotly}.
}
\details{
See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation.

Note that \code{draw_3Dscatter} uses the theme's \code{plot_bg} as \code{grid_col}.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
draw_3Dscatter(iris, group = iris$Species, theme = theme_darkgraygrid())
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_bar.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_bar.R
\name{draw_bar}
\alias{draw_bar}
\title{Interactive Barplots}
\usage{
draw_bar(
  x,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  alpha = 1,
  horizontal = FALSE,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  barmode = c("group", "relative", "stack", "overlay"),
  group_names = NULL,
  order_by_val = FALSE,
  ylim = NULL,
  hovernames = NULL,
  feature_names = NULL,
  font_size = 16,
  annotate = FALSE,
  annotate_col = theme[["labs_col"]],
  legend = NULL,
  legend_col = NULL,
  legend_xy = c(1, 1),
  legend_orientation = "v",
  legend_xanchor = "left",
  legend_yanchor = "auto",
  hline = NULL,
  hline_col = NULL,
  hline_width = 1,
  hline_dash = "solid",
  hline_annotate = NULL,
  hline_annotation_x = 1,
  margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0),
  automargin_x = TRUE,
  automargin_y = TRUE,
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  verbosity = 0L
)
}
\arguments{
\item{x}{vector (possibly named), matrix, or data.frame: If matrix or
data.frame, rows are groups (can be 1 row), columns are features}

\item{main}{Character: Main plot title.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{alpha}{Float (0, 1]: Transparency for bar colors.}

\item{horizontal}{Logical: If TRUE, plot bars horizontally}

\item{theme}{\code{Theme} object.}

\item{palette}{Character vector: Colors to use.}

\item{barmode}{Character: Type of bar plot to make: "group", "relative",
"stack", "overlay". Default = "group". Use
"relative" for stacked bars, wich handles negative values correctly,
unlike "stack", as of writing.}

\item{group_names}{Character, vector, length = NROW(x): Group names.
Default = NULL, which uses \code{rownames(x)}}

\item{order_by_val}{Logical: If TRUE, order bars by increasing value.
Only use for single group data.}

\item{ylim}{Float, vector, length 2: y-axis limits.}

\item{hovernames}{Character, vector: Optional character vector to show on
hover over each bar.}

\item{feature_names}{Character, vector, length = NCOL(x): Feature names.
Default = NULL, which uses \code{colnames(x)}}

\item{font_size}{Float: Font size for all labels.}

\item{annotate}{Logical: If TRUE, annotate stacked bars}

\item{annotate_col}{Color for annotations}

\item{legend}{Logical: If TRUE, draw legend. Default = NULL, and will be
turned on if there is more than one feature present}

\item{legend_col}{Color: Legend text color. Default = NULL, determined by
theme}

\item{legend_xy}{Numeric, vector, length 2: x and y for plotly's legend}

\item{legend_orientation}{"v" or "h" for vertical or horizontal}

\item{legend_xanchor}{Character: Legend's x anchor: "left", "center",
"right", "auto"}

\item{legend_yanchor}{Character: Legend's y anchor: "top", "middle",
"bottom", "auto"}

\item{hline}{Float: If defined, draw a horizontal line at this y value.}

\item{hline_col}{Color for \code{hline}.}

\item{hline_width}{Float: Width for \code{hline}.}

\item{hline_dash}{Character: Type of line to draw: "solid", "dot", "dash",
"longdash", "dashdot",
or "longdashdot"}

\item{hline_annotate}{Character: Text of horizontal line annotation if
\code{hline} is set}

\item{hline_annotation_x}{Numeric: x position to place annotation with paper
as reference. 0: to the left of the plot area; 1: to the right of the plot area}

\item{margin}{Named list: plot margins.}

\item{automargin_x}{Logical: If TRUE, automatically set x-axis margins}

\item{automargin_y}{Logical: If TRUE, automatically set y-axis margins}

\item{padding}{Integer: N pixels to pad plot.}

\item{displayModeBar}{Logical: If TRUE, show plotly's modebar}

\item{modeBar_file_format}{Character: "svg", "png", "jpeg", "pdf" / any
output file type supported by plotly and your system}

\item{filename}{Character: Path to file to save static plot.}

\item{file_width}{Integer: File width in pixels for when \code{filename} is
set.}

\item{file_height}{Integer: File height in pixels for when \code{filename}
is set.}

\item{file_scale}{Numeric: If saving to file, scale plot by this number}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{plotly} object.
}
\description{
Draw interactive barplots using \code{plotly}
}
\details{
See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
draw_bar(VADeaths, legend_xy = c(0, 1))
draw_bar(VADeaths, legend_xy = c(1, 1), legend_xanchor = "left")
# simple individual bars
a <- c(4, 7, 2)
draw_bar(a)
# if input is a data.frame, each row is a group and each column is a feature
b <- data.frame(x = c(3, 5, 7), y = c(2, 1, 8), z = c(4, 5, 2))
rownames(b) <- c("Jen", "Ben", "Ren")
draw_bar(b)
# stacked
draw_bar(b, barmode = "stack")
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_box.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_box.R
\name{draw_box}
\alias{draw_box}
\title{Interactive Boxplots & Violin plots}
\usage{
draw_box(
  x,
  time = NULL,
  time_bin = c("year", "quarter", "month", "day"),
  type = c("box", "violin"),
  group = NULL,
  x_transform = c("none", "scale", "minmax"),
  main = NULL,
  xlab = "",
  ylab = NULL,
  alpha = 0.6,
  bg = NULL,
  plot_bg = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  boxpoints = "outliers",
  quartilemethod = "linear",
  xlim = NULL,
  ylim = NULL,
  violin_box = TRUE,
  orientation = "v",
  annotate_n = FALSE,
  annotate_n_y = 1,
  annotate_mean = FALSE,
  annotate_meansd = FALSE,
  annotate_meansd_y = 1,
  annotate_col = theme[["labs_col"]],
  xnames = NULL,
  group_lines = FALSE,
  group_lines_dash = "dot",
  group_lines_col = NULL,
  group_lines_alpha = 0.5,
  labelify = TRUE,
  order_by_fn = NULL,
  font_size = 16,
  ylab_standoff = 18,
  legend = NULL,
  legend_col = NULL,
  legend_xy = NULL,
  legend_orientation = "v",
  legend_xanchor = "auto",
  legend_yanchor = "auto",
  xaxis_type = "category",
  cataxis_tickangle = "auto",
  margin = list(b = 65, l = 65, t = 50, r = 12, pad = 0),
  automargin_x = TRUE,
  automargin_y = TRUE,
  boxgroupgap = NULL,
  hovertext = NULL,
  show_n = FALSE,
  pvals = NULL,
  htest = "none",
  htest_compare = 0,
  htest_y = NULL,
  htest_annotate = TRUE,
  htest_annotate_x = 0,
  htest_annotate_y = -0.065,
  htest_star_col = theme[["labs_col"]],
  htest_bracket_col = theme[["labs_col"]],
  starbracket_pad = c(0.04, 0.05, 0.09),
  use_plotly_group = FALSE,
  width = NULL,
  height = NULL,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  mathjax = NULL
)
}
\arguments{
\item{x}{Vector or List of vectors: Input}

\item{time}{Date or date-time vector}

\item{time_bin}{Character: "year", "quarter", "month", or "day". Period to
bin by}

\item{type}{Character: "box" or "violin"}

\item{group}{Factor to group by}

\item{x_transform}{Character: "none", "scale", or "minmax" to use raw values,
scaled and centered values or min-max normalized to 0-1, respectively.
Transform is applied to each variable before grouping, so that groups are
comparable}

\item{main}{Character: Plot title.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{alpha}{Float (0, 1]: Transparency for box colors.}

\item{bg}{Color: Background color.}

\item{plot_bg}{Color: Background color for plot area.}

\item{theme}{\code{Theme} object.}

\item{palette}{Character vector: Colors to use.}

\item{boxpoints}{Character or FALSE: "all", "suspectedoutliers", "outliers"
See \url{https://plotly.com/r/box-plots/#choosing-the-algorithm-for-computing-quartiles}}

\item{quartilemethod}{Character: "linear", "exclusive", "inclusive"}

\item{xlim}{Numeric vector: x-axis limits}

\item{ylim}{Numeric vector: y-axis limits}

\item{violin_box}{Logical: If TRUE and type is "violin" show box within
violin plot}

\item{orientation}{Character: "v" or "h" for vertical, horizontal}

\item{annotate_n}{Logical: If TRUE, annotate with N in each box}

\item{annotate_n_y}{Numeric: y position for \code{annotate_n}}

\item{annotate_mean}{Logical: If TRUE, annotate with mean of each box}

\item{annotate_meansd}{Logical: If TRUE, annotate with mean (SD) of each box}

\item{annotate_meansd_y}{Numeric: y position for \code{annotate_meansd}}

\item{annotate_col}{Color for annotations}

\item{xnames}{Character, vector, length = NROW(x): x-axis names. Default = NULL, which
tries to set names automatically.}

\item{group_lines}{Logical: If TRUE, add separating lines between groups of
boxplots}

\item{group_lines_dash}{Character: "solid", "dot", "dash", "longdash",
"dashdot", or "longdashdot"}

\item{group_lines_col}{Color for \code{group_lines}}

\item{group_lines_alpha}{Numeric: transparency for \code{group_lines_col}}

\item{labelify}{Logical: If TRUE, \link{labelify} x names}

\item{order_by_fn}{Function: If defined, order boxes by increasing value of
this function (e.g. median).}

\item{font_size}{Float: Font size for all labels.}

\item{ylab_standoff}{Numeric: Standoff for y-axis label}

\item{legend}{Logical: If TRUE, draw legend.}

\item{legend_col}{Color: Legend text color. Default = NULL, determined by
the theme.}

\item{legend_xy}{Float, vector, length 2: Relative x, y position for legend.}

\item{legend_orientation}{"v" or "h" for vertical, horizontal}

\item{legend_xanchor}{Character: Legend's x anchor: "left", "center",
"right", "auto"}

\item{legend_yanchor}{Character: Legend's y anchor: "top", "middle",
"bottom", "auto"}

\item{xaxis_type}{Character: "linear", "log", "date", "category",
"multicategory"}

\item{cataxis_tickangle}{Numeric: Angle for categorical axis tick labels}

\item{margin}{Named list: plot margins.}

\item{automargin_x}{Logical: If TRUE, automatically set x-axis margins}

\item{automargin_y}{Logical: If TRUE, automatically set y-axis margins}

\item{boxgroupgap}{Numeric: Sets the gap (in plot fraction) between boxes
of the same location coordinate}

\item{hovertext}{Character vector: Text to show on hover for each data point}

\item{show_n}{Logical: If TRUE, show N in each box}

\item{pvals}{Numeric vector: Precomputed p-values. Should correspond to each box.
Bypasses \code{htest} and \code{htest_compare}. Requires \code{group} to be set}

\item{htest}{Character: e.g. "t.test", "wilcox.test" to compare each box to
the \emph{first} box. If grouped, compare within each group to the first box.
If p-value of test is less than \code{htest.thresh}, add asterisk above/
to the side of each box}

\item{htest_compare}{Integer: 0: Compare all distributions against the first one;
2: Compare every second box to the one before it. Requires \code{group} to
be set}

\item{htest_y}{Numeric: y coordinate for \code{htest} annotation}

\item{htest_annotate}{Logical: if TRUE, include htest annotation}

\item{htest_annotate_x}{Numeric: x-axis paper coordinate for htest annotation}

\item{htest_annotate_y}{Numeric: y-axis paper coordinate for htest annotation}

\item{htest_star_col}{Color for htest annotation stars}

\item{htest_bracket_col}{Color for htest annotation brackets}

\item{starbracket_pad}{Numeric: Padding for htest annotation brackets}

\item{use_plotly_group}{If TRUE, use plotly's \code{group} arg to group
boxes.}

\item{width}{Numeric: Force plot size to this width. Default = NULL, i.e. fill
available space}

\item{height}{Numeric: Force plot size to this height. Default = NULL, i.e. fill
available space}

\item{displayModeBar}{Logical: If TRUE, show plotly's modebar}

\item{modeBar_file_format}{Character: "svg", "png", "jpeg", "pdf"}

\item{filename}{Character: Path to file to save static plot.}

\item{file_width}{Integer: File width in pixels for when \code{filename} is
set.}

\item{file_height}{Integer: File height in pixels for when \code{filename}
is set.}

\item{file_scale}{Numeric: If saving to file, scale plot by this number}

\item{mathjax}{Optional Character \{"local", "cdn"\}: Whether to use local or CDN version of
MathJax for rendering mathematical annotations.}
}
\value{
\code{plotly} object.
}
\description{
Draw interactive boxplots or violin plots using \pkg{plotly}
}
\details{
See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation.

For multiple box plots, the recommendation is:
\itemize{
\item \code{x=dat[, columnindex]} for multiple variables of a data.frame
\item \code{x=list(a=..., b=..., etc.)} for multiple variables of potentially
different length
\item \code{x=split(var, group)} for one variable with multiple groups: group names
appear below boxplots
\item \verb{x=dat[, columnindex], group = factor} for grouping multiple variables:
group names appear in legend
}

If \code{orientation == "h"}, \code{xlab} is applied to y-axis and vice versa.
Similarly, \code{x.axist.type} applies to y-axis - this defaults to
"category" and would not normally need changing.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# A.1 Box plot of 4 variables
draw_box(iris[, 1:4])
# A.2 Grouped Box plot
draw_box(iris[, 1:4], group = iris[["Species"]])
draw_box(iris[, 1:4], group = iris[["Species"]], annotate_n = TRUE)
# B. Boxplot binned by time periods
# Synthetic data with an instantenous shift in distributions
set.seed(2021)
dat1 <- data.frame(alpha = rnorm(200, 0), beta = rnorm(200, 2), gamma = rnorm(200, 3))
dat2 <- data.frame(alpha = rnorm(200, 5), beta = rnorm(200, 8), gamma = rnorm(200, -3))
x <- rbind(dat1, dat2)
startDate <- as.Date("2019-12-04")
endDate <- as.Date("2021-03-31")
time <- seq(startDate, endDate, length.out = 400)
draw_box(x[, 1], time, "year", ylab = "alpha")
draw_box(x, time, "year", legend.xy = c(0, 1))
draw_box(x, time, "quarter", legend.xy = c(0, 1))
draw_box(x, time, "month",
  legend.orientation = "h",
  legend.xy = c(0, 1),
  legend.yanchor = "bottom"
)
# (Note how the boxplots widen when the period includes data from both dat1 and dat2)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_calibration.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_calibration.R
\name{draw_calibration}
\alias{draw_calibration}
\title{Draw calibration plot}
\usage{
draw_calibration(
  true_labels,
  predicted_prob,
  n_bins = 10L,
  bin_method = c("quantile", "equidistant"),
  binclasspos = 2L,
  main = NULL,
  subtitle = NULL,
  xlab = "Mean predicted probability",
  ylab = "Empirical risk",
  show_marginal_x = TRUE,
  marginal_x_y = -0.02,
  marginal_col = NULL,
  marginal_size = 10,
  mode = "markers+lines",
  show_brier = TRUE,
  theme = choose_theme(getOption("rtemis_theme")),
  filename = NULL,
  ...
)
}
\arguments{
\item{true_labels}{Factor or list of factors with true class labels}

\item{predicted_prob}{Numeric vector or list of numeric vectors with predicted probabilities}

\item{n_bins}{Integer: Number of windows to split the data into}

\item{bin_method}{Character: "quantile" or "equidistant": Method to bin the estimated
probabilities.}

\item{binclasspos}{Integer: Index of the positive class. The convention used in the package is
the second level is the positive class.}

\item{main}{Character: Main title}

\item{subtitle}{Character: Subtitle, placed bottom right of plot}

\item{xlab}{Character: x-axis label}

\item{ylab}{Character: y-axis label}

\item{show_marginal_x}{Logical: Add marginal plot of distribution of estimated probabilities}

\item{marginal_x_y}{Numeric: y position of marginal plot}

\item{marginal_col}{Character: Color of marginal plot}

\item{marginal_size}{Numeric: Size of marginal plot}

\item{mode}{Character: "lines", "markers", "lines+markers": How to plot.}

\item{show_brier}{Logical: If TRUE, add Brier scores to trace names.}

\item{theme}{\code{Theme} object.}

\item{filename}{Character: Path to save output.}

\item{...}{Additional arguments passed to \link{draw_scatter}}
}
\value{
\code{plotly} object.
}
\description{
Draw calibration plot
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# Synthetic data with n cases
n <- 500L
true_labels <- factor(sample(c("A", "B"), n, replace = TRUE))
# Synthetic probabilities where A has mean 0.25 and B has mean 0.75
predicted_prob <- ifelse(true_labels == "A",
  rbeta(n, 2, 6),
  rbeta(n, 6, 2)
)
draw_calibration(true_labels, predicted_prob)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_confusion.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_confusion.R
\name{draw_confusion}
\alias{draw_confusion}
\title{Plot confusion matrix}
\usage{
draw_confusion(
  x,
  xlab = "Predicted",
  ylab = "Reference",
  true_col = "#43A4AC",
  false_col = "#FA9860",
  font_size = 18,
  main = NULL,
  main_y = 1,
  main_yanchor = "bottom",
  theme = choose_theme(getOption("rtemis_theme")),
  margin = list(l = 20, r = 5, b = 5, t = 20),
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
)
}
\arguments{
\item{x}{\code{ClassificationMetrics} object produced by \link{classification_metrics} or confusion matrix
where rows are the reference and columns are the estimated classes. For binary classification,
the first row and column are the positive class.}

\item{xlab}{Character: x-axis label. Default is "Predicted".}

\item{ylab}{Character: y-axis label. Default is "Reference".}

\item{true_col}{Color for true positives & true negatives.}

\item{false_col}{Color for false positives & false negatives.}

\item{font_size}{Integer: font size.}

\item{main}{Character: plot title.}

\item{main_y}{Numeric: y position of the title.}

\item{main_yanchor}{Character: y anchor of the title.}

\item{theme}{\code{Theme} object.}

\item{margin}{List: Plot margins.}

\item{filename}{Character: file name to save the plot. Default is NULL.}

\item{file_width}{Numeric: width of the file. Default is 500.}

\item{file_height}{Numeric: height of the file. Default is 500.}

\item{file_scale}{Numeric: scale of the file. Default is 1.}
}
\value{
\code{plotly} object.
}
\description{
Plot confusion matrix
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# Assume positive class is "b"
true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b"))
predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a"))
predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39)
metrics <- classification_metrics(true_labels, predicted_labels, predicted_prob)
draw_confusion(metrics)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_dist.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_dist.R
\name{draw_dist}
\alias{draw_dist}
\title{Draw Distributions using Histograms and Density Plots}
\usage{
draw_dist(
  x,
  type = c("density", "histogram"),
  mode = c("overlap", "ridge"),
  group = NULL,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  col = NULL,
  alpha = 0.75,
  plot_bg = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = getOption("rtemis_palette", "rtms"),
  axes_square = FALSE,
  group_names = NULL,
  font_size = 16,
  font_alpha = 0.8,
  legend = NULL,
  legend_xy = c(0, 1),
  legend_col = NULL,
  legend_bg = "#FFFFFF00",
  legend_border_col = "#FFFFFF00",
  bargap = 0.05,
  vline = NULL,
  vline_col = theme[["fg"]],
  vline_width = 1,
  vline_dash = "dot",
  text = NULL,
  text_x = 1,
  text_xref = "paper",
  text_xanchor = "left",
  text_y = 1,
  text_yref = "paper",
  text_yanchor = "top",
  text_col = theme[["fg"]],
  margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0),
  automargin_x = TRUE,
  automargin_y = TRUE,
  zerolines = FALSE,
  density_kernel = "gaussian",
  density_bw = "SJ",
  histnorm = c("", "density", "percent", "probability", "probability density"),
  histfunc = c("count", "sum", "avg", "min", "max"),
  hist_n_bins = 20,
  barmode = "overlay",
  ridge_sharex = TRUE,
  ridge_y_labs = FALSE,
  ridge_order_on_mean = TRUE,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  width = NULL,
  height = NULL,
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
)
}
\arguments{
\item{x}{Numeric vector / data.frame / list: Input. If not a vector, each column / each element is drawn.}

\item{type}{Character: "density" or "histogram".}

\item{mode}{Character: "overlap", "ridge". How to plot different groups; on the same axes ("overlap"), or on separate plots with the same x-axis ("ridge").}

\item{group}{Vector: Will be converted to factor; levels define group members.}

\item{main}{Character: Main title for the plot.}

\item{xlab}{Character: Label for the x-axis.}

\item{ylab}{Character: Label for the y-axis.}

\item{col}{Color: Colors for the plot.}

\item{alpha}{Numeric: Alpha transparency for plot elements.}

\item{plot_bg}{Color: Background color for plot area.}

\item{theme}{\code{Theme} object.}

\item{palette}{Character: Color palette to use.}

\item{axes_square}{Logical: If TRUE, draw a square plot to fill the graphic device. Default = FALSE.}

\item{group_names}{Character: Names for the groups.}

\item{font_size}{Numeric: Font size for plot text.}

\item{font_alpha}{Numeric: Alpha transparency for font.}

\item{legend}{Logical: If TRUE, draw legend. Default = NULL, which will be set to TRUE if x is a list of more than 1 element.}

\item{legend_xy}{Numeric, vector, length 2: Relative x, y position for legend. Default = c(0, 1).}

\item{legend_col}{Color: Color for the legend text.}

\item{legend_bg}{Color: Background color for legend.}

\item{legend_border_col}{Color: Border color for legend.}

\item{bargap}{Numeric: The gap between adjacent histogram bars in plot fraction.}

\item{vline}{Numeric, vector: If defined, draw a vertical line at this x value(s).}

\item{vline_col}{Color: Color for \code{vline}.}

\item{vline_width}{Numeric: Width for \code{vline}.}

\item{vline_dash}{Character: Type of line to draw: "solid", "dot", "dash", "longdash", "dashdot", or "longdashdot".}

\item{text}{Character: If defined, add this text over the plot.}

\item{text_x}{Numeric: x-coordinate for \code{text}.}

\item{text_xref}{Character: "x": \code{text_x} refers to plot's x-axis; "paper": \code{text_x} refers to plotting area from 0-1.}

\item{text_xanchor}{Character: "auto", "left", "center", "right".}

\item{text_y}{Numeric: y-coordinate for \code{text}.}

\item{text_yref}{Character: "y": \code{text_y} refers to plot's y-axis; "paper": \code{text_y} refers to plotting area from 0-1.}

\item{text_yanchor}{Character: "auto", "top", "middle", "bottom".}

\item{text_col}{Color: Color for \code{text}.}

\item{margin}{List: Margins for the plot.}

\item{automargin_x}{Logical: If TRUE, automatically adjust x-axis margins.}

\item{automargin_y}{Logical: If TRUE, automatically adjust y-axis margins.}

\item{zerolines}{Logical: If TRUE, draw lines at y = 0.}

\item{density_kernel}{Character: Kernel to use for density estimation.}

\item{density_bw}{Character: Bandwidth to use for density estimation.}

\item{histnorm}{Character: NULL, "percent", "probability", "density", "probability density".}

\item{histfunc}{Character: "count", "sum", "avg", "min", "max".}

\item{hist_n_bins}{Integer: Number of bins to use if type = "histogram".}

\item{barmode}{Character: Barmode for histogram. One of "overlay", "stack", "relative", "group".}

\item{ridge_sharex}{Logical: If TRUE, draw single x-axis when \code{mode = "ridge"}.}

\item{ridge_y_labs}{Logical: If TRUE, show individual y labels when \code{mode = "ridge"}.}

\item{ridge_order_on_mean}{Logical: If TRUE, order groups by mean value when \code{mode = "ridge"}.}

\item{displayModeBar}{Logical: If TRUE, display the mode bar.}

\item{modeBar_file_format}{Character: File format for mode bar. Default = "svg".}

\item{width}{Numeric: Force plot size to this width. Default = NULL, i.e. fill available space.}

\item{height}{Numeric: Force plot size to this height. Default = NULL, i.e. fill available space.}

\item{filename}{Character: Path to file to save static plot.}

\item{file_width}{Integer: File width in pixels for when \code{filename} is set.}

\item{file_height}{Integer: File height in pixels for when \code{filename} is set.}

\item{file_scale}{Numeric: If saving to file, scale plot by this number.}
}
\value{
\code{plotly} object.
}
\description{
Draw Distributions using Histograms and Density Plots using \code{plotly}.
}
\details{
See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation.

If input is data.frame, non-numeric variables will be removed.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# Will automatically use only numeric columns
draw_dist(iris)
draw_dist(iris[["Sepal.Length"]], group = iris[["Species"]])
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_fit.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_scatter.R
\name{draw_fit}
\alias{draw_fit}
\title{True vs. Predicted Plot}
\usage{
draw_fit(
  x,
  y,
  xlab = "True",
  ylab = "Predicted",
  fit = "glm",
  se_fit = TRUE,
  axes_square = TRUE,
  axes_equal = TRUE,
  diagonal = TRUE,
  ...
)
}
\arguments{
\item{x}{Numeric, vector/data.frame/list: True values. If y is NULL and
\code{NCOL(x) > 1}, first two columns used as \code{x} and \code{y}, respectively}

\item{y}{Numeric, vector/data.frame/list: Predicted values}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{fit}{Character: Fit method.}

\item{se_fit}{Logical: If TRUE, include standard error of the fit.}

\item{axes_square}{Logical: If TRUE, draw a square plot.}

\item{axes_equal}{Logical: If TRUE, set equal scaling for axes.}

\item{diagonal}{Logical: If TRUE, add diagonal line.}

\item{...}{Additional arguments passed to \link{draw_scatter}}
}
\value{
\code{plotly} object.
}
\description{
A \code{draw_scatter} wrapper for plotting true vs. predicted values
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
x <- rnorm(500)
y <- x + rnorm(500)
draw_fit(x, y)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_graphD3.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_graphd3.R
\name{draw_graphD3}
\alias{draw_graphD3}
\title{Plot graph using \pkg{networkD3}}
\usage{
draw_graphD3(
  net,
  groups = NULL,
  color_scale = NULL,
  edge_col = NULL,
  node_col = NULL,
  node_alpha = 0.5,
  edge_alpha = 0.33,
  zoom = TRUE,
  legend = FALSE,
  palette = get_palette(getOption("rtemis_palette")),
  theme = choose_theme(getOption("rtemis_theme")),
  ...
)
}
\arguments{
\item{net}{\pkg{igraph} network.}

\item{groups}{Vector, length n nodes indicating group/cluster/community membership of nodes in \code{net}.}

\item{color_scale}{D3 colorscale (e.g. \code{networkD3::JS("d3.scaleOrdinal(d3.schemeCategory20b);")}).}

\item{edge_col}{Color for edges.}

\item{node_col}{Color for nodes.}

\item{node_alpha}{Float [0, 1]: Node opacity.}

\item{edge_alpha}{Float [0, 1]: Edge opacity.}

\item{zoom}{Logical: If TRUE, graph is zoomable.}

\item{legend}{Logical: If TRUE, display legend for groups.}

\item{palette}{Character vector: Colors to use.}

\item{theme}{\code{Theme} object.}

\item{...}{Additional arguments to pass to \code{networkD3}.}
}
\value{
\code{forceNetwork} object.
}
\description{
Plot graph using \pkg{networkD3}
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
library(igraph)
g <- make_ring(10)
draw_graphD3(g)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_graphjs.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_graphjs.R
\name{draw_graphjs}
\alias{draw_graphjs}
\title{Plot network using \pkg{threejs::graphjs}}
\usage{
draw_graphjs(
  net,
  vertex_size = 1,
  vertex_col = NULL,
  vertex_label_col = NULL,
  vertex_label_alpha = 0.66,
  vertex_frame_col = NA,
  vertex_label = NULL,
  vertex_shape = "circle",
  edge_col = NULL,
  edge_alpha = 0.5,
  edge_curved = 0.35,
  edge_width = 2,
  layout = c("fr", "dh", "drl", "gem", "graphopt", "kk", "lgl", "mds", "sugiyama"),
  coords = NULL,
  layout_args = list(),
  cluster = NULL,
  groups = NULL,
  cluster_config = list(),
  cluster_mark_groups = TRUE,
  cluster_color_vertices = FALSE,
  main = "",
  theme = choose_theme(getOption("rtemis_theme")),
  palette = getOption("rtemis_palette", "rtms"),
  mar = rep(0, 4),
  filename = NULL,
  verbosity = 1L,
  ...
)
}
\arguments{
\item{net}{\pkg{igraph} network.}

\item{vertex_size}{Numeric: Vertex size.}

\item{vertex_col}{Color for vertices.}

\item{vertex_label_col}{Color for vertex labels.}

\item{vertex_label_alpha}{Numeric: Transparency for \code{vertex_label_col}.}

\item{vertex_frame_col}{Color for vertex border (frame).}

\item{vertex_label}{Character vector: Vertex labels. Default = NULL, which will keep existing names in \code{net} if any. Set to NA to avoid printing vertex labels.}

\item{vertex_shape}{Character, vector, length 1 or N nodes: Vertex shape. See \code{graphjs("vertex_shape")}.}

\item{edge_col}{Color for edges.}

\item{edge_alpha}{Numeric: Transparency for edges.}

\item{edge_curved}{Numeric: Curvature of edges.}

\item{edge_width}{Numeric: Edge thickness.}

\item{layout}{Character: one of: "fr", "dh", "drl", "gem", "graphopt", "kk", "lgl", "mds", "sugiyama", corresponding to all the available layouts in \pkg{igraph}.}

\item{coords}{Output of precomputed \pkg{igraph} layout. If provided, \code{layout} is ignored.}

\item{layout_args}{List of arguments to pass to \code{layout} function.}

\item{cluster}{Character: one of: "edge_betweenness", "fast_greedy", "infomap", "label_prop", "leading_eigen", "louvain", "optimal", "spinglass", "walktrap", corresponding to all the available \pkg{igraph} clustering functions.}

\item{groups}{Output of precomputed \pkg{igraph} clustering. If provided, \code{cluster} is ignored.}

\item{cluster_config}{List of arguments to pass to \code{cluster} function.}

\item{cluster_mark_groups}{Logical: If TRUE, draw polygons to indicate clusters, if \code{groups} or \code{cluster} are defined.}

\item{cluster_color_vertices}{Logical: If TRUE, color vertices by cluster membership.}

\item{main}{Character: Main title.}

\item{theme}{\code{Theme} object.}

\item{palette}{Color vector or name of rtemis palette.}

\item{mar}{Numeric vector, length 4: \code{par}'s margin argument.}

\item{filename}{Character: If provided, save plot to this filepath.}

\item{verbosity}{Integer: Verbosity level.}

\item{...}{Extra arguments to pass to \code{igraph::plot.igraph()}.}
}
\value{
\code{scatterplotThree} object.
}
\description{
Interactive plotting of an \pkg{igraph} net using \pkg{threejs}.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
library(igraph)
g <- make_ring(10)
draw_graphjs(g)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_heatmap.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_heatmap.R
\name{draw_heatmap}
\alias{draw_heatmap}
\title{Interactive Heatmaps}
\usage{
draw_heatmap(
  x,
  Rowv = TRUE,
  Colv = TRUE,
  cluster = FALSE,
  symm = FALSE,
  cellnote = NULL,
  colorgrad_n = 101,
  colors = NULL,
  space = "rgb",
  lo = "#18A3AC",
  lomid = NULL,
  mid = NULL,
  midhi = NULL,
  hi = "#F48024",
  k_row = 1,
  k_col = 1,
  grid_gap = 0,
  limits = NULL,
  margins = NULL,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  key_title = NULL,
  showticklabels = NULL,
  colorbar_len = 0.7,
  plot_method = "plotly",
  theme = choose_theme(getOption("rtemis_theme")),
  row_side_colors = NULL,
  row_side_palette = NULL,
  col_side_colors = NULL,
  col_side_palette = NULL,
  font_size = NULL,
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  ...
)
}
\arguments{
\item{x}{Input matrix.}

\item{Rowv}{Logical or dendrogram. If Logical: Compute dendrogram and reorder rows. Defaults to FALSE. If dendrogram: use as is, without reordering. See more at \code{heatmaply::heatmaply("Rowv")}.}

\item{Colv}{Logical or dendrogram. If Logical: Compute dendrogram and reorder columns. Defaults to FALSE. If dendrogram: use as is, without reordering. See more at \code{heatmaply::heatmaply("Colv")}.}

\item{cluster}{Logical: If TRUE, set \code{Rowv} and \code{Colv} to TRUE.}

\item{symm}{Logical: If TRUE, treat \code{x} symmetrically - \code{x} must be a square matrix.}

\item{cellnote}{Matrix with values to be displayed on hover. Defaults to \code{ddSci(x)}.}

\item{colorgrad_n}{Integer: Number of colors in gradient. Default = 101.}

\item{colors}{Character vector: Colors to use in gradient.}

\item{space}{Character: Color space to use. Default = "rgb".}

\item{lo}{Character: Color for low values. Default = "#18A3AC".}

\item{lomid}{Character: Color for low-mid values.}

\item{mid}{Character: Color for mid values.}

\item{midhi}{Character: Color for mid-high values.}

\item{hi}{Character: Color for high values. Default = "#F48024".}

\item{k_row}{Integer: Number of desired number of groups by which to color dendrogram branches in the rows. Default = 1.}

\item{k_col}{Integer: Number of desired number of groups by which to color dendrogram branches in the columns. Default = 1.}

\item{grid_gap}{Integer: Space between cells. Default = 0 (no space).}

\item{limits}{Float, length 2: Determine color range. Default = NULL, which automatically centers values around 0.}

\item{margins}{Float, length 4: Heatmap margins.}

\item{main}{Character: Main title.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{key_title}{Character: Title for the color key.}

\item{showticklabels}{Logical: If TRUE, show tick labels.}

\item{colorbar_len}{Numeric: Length of the colorbar.}

\item{plot_method}{Character: Plot method to use. Default = "plotly".}

\item{theme}{\code{Theme} object.}

\item{row_side_colors}{Data frame: Column names will be label names, cells should be label colors. See \code{heatmaply::heatmaply("row_side_colors")}.}

\item{row_side_palette}{Color palette function. See \code{heatmaply::heatmaply("row_side_palette")}.}

\item{col_side_colors}{Data frame: Column names will be label names, cells should be label colors. See \code{heatmaply::heatmaply("col_side_colors")}.}

\item{col_side_palette}{Color palette function. See \code{heatmaply::heatmaply("col_side_palette")}.}

\item{font_size}{Numeric: Font size.}

\item{padding}{Numeric: Padding between cells.}

\item{displayModeBar}{Logical: If TRUE, display the plotly mode bar.}

\item{modeBar_file_format}{Character: File format for image exports from the mode bar.}

\item{filename}{Character: File name to save the plot.}

\item{file_width}{Numeric: Width of exported image.}

\item{file_height}{Numeric: Height of exported image.}

\item{file_scale}{Numeric: Scale of exported image.}

\item{...}{Additional arguments to be passed to \code{heatmaply::heatmaply}.}
}
\value{
\code{plotly} object.`
}
\description{
Draw interactive heatmaps using \code{heatmaply}.
}
\details{
See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation.
'heatmaply' unfortunately forces loading of the 'colorspace' namespace.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
x <- rnormmat(200, 20)
xcor <- cor(x)
draw_heatmap(xcor)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_leaflet.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_leaflet.R
\name{draw_leaflet}
\alias{draw_leaflet}
\title{Plot interactive choropleth map using \pkg{leaflet}}
\usage{
draw_leaflet(
  fips,
  values,
  names = NULL,
  fillOpacity = 1,
  color_mapping = c("Numeric", "Bin"),
  col_lo = "#0290EE",
  col_hi = "#FE4AA3",
  col_na = "#303030",
  col_highlight = "#FE8A4F",
  col_interpolate = c("linear", "spline"),
  col_bins = 21,
  domain = NULL,
  weight = 0.5,
  color = "black",
  alpha = 1,
  bg_tile_provider = leaflet::providers[["CartoDB.Positron"]],
  bg_tile_alpha = 0.67,
  fg_tile_provider = leaflet::providers[["CartoDB.PositronOnlyLabels"]],
  legend_position = c("topright", "bottomright", "bottomleft", "topleft"),
  legend_alpha = 0.8,
  legend_title = NULL,
  init_lng = -98.5418083333333,
  init_lat = 39.2074138888889,
  init_zoom = 3,
  stroke = TRUE
)
}
\arguments{
\item{fips}{Character vector: FIPS codes. (If numeric, it will be appropriately zero-padded).}

\item{values}{Values to map to \code{fips}.}

\item{names}{Character vector: Optional county names to appear on hover along \code{values}.}

\item{fillOpacity}{Float: Opacity for fill colors.}

\item{color_mapping}{Character: "Numeric" or "Bin".}

\item{col_lo}{Overlay color mapped to lowest value.}

\item{col_hi}{Overlay color mapped to highest value.}

\item{col_na}{Color mapped to NA values.}

\item{col_highlight}{Hover border color.}

\item{col_interpolate}{Character: "linear" or "spline".}

\item{col_bins}{Integer: Number of color bins to create if \code{color_mapping = "Bin"}.}

\item{domain}{Limits for mapping colors to values. Default = NULL and set to range.}

\item{weight}{Float: Weight of county border lines.}

\item{color}{Color of county border lines.}

\item{alpha}{Float: Overlay transparency.}

\item{bg_tile_provider}{Background tile (below overlay colors), one of \code{leaflet::providers}.}

\item{bg_tile_alpha}{Float: Background tile transparency.}

\item{fg_tile_provider}{Foreground tile (above overlay colors), one of \code{leaflet::providers}.}

\item{legend_position}{Character: One of: "topright", "bottomright", "bottomleft", "topleft".}

\item{legend_alpha}{Float: Legend box transparency.}

\item{legend_title}{Character: Defaults to name of \code{values} variable.}

\item{init_lng}{Float: Center map around this longitude (in decimal form). Default = -98.54180833333334 (US geographic center).}

\item{init_lat}{Float: Center map around this latitude (in decimal form). Default = 39.207413888888894 (US geographic center).}

\item{init_zoom}{Integer: Initial zoom level (depends on device, i.e. window, size).}

\item{stroke}{Logical: If TRUE, draw polygon borders.}
}
\value{
\code{leaflet} object.
}
\description{
Plot interactive choropleth map using \pkg{leaflet}
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
fips <- c(06075, 42101)
population <- c(874961, 1579000)
names <- c("SF", "Philly")
draw_leaflet(fips, population, names)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_pie.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_pie.R
\name{draw_pie}
\alias{draw_pie}
\title{Interactive Pie Chart}
\usage{
draw_pie(
  x,
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  alpha = 0.8,
  bg = NULL,
  plot_bg = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  category_names = NULL,
  textinfo = "label+percent",
  font_size = 16,
  labs_col = NULL,
  legend = TRUE,
  legend_col = NULL,
  sep_col = NULL,
  margin = list(b = 50, l = 50, t = 50, r = 20),
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
)
}
\arguments{
\item{x}{data.frame: Input: Either a) 1 numeric column with categories defined by rownames, or
b) two columns, the first is category names, the second numeric or c) a numeric vector with categories defined using
the \code{category.names} argument.}

\item{main}{Character: Plot title. Default = NULL, which results in \code{colnames(x)[1]}.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{alpha}{Numeric: Alpha for the pie slices.}

\item{bg}{Character: Background color.}

\item{plot_bg}{Character: Plot background color.}

\item{theme}{\code{Theme} object.}

\item{palette}{Character vector: Colors to use.}

\item{category_names}{Character, vector, length = NROW(x): Category names. Default = NULL, which uses
either \code{rownames(x)}, or the first column of \code{x} if \code{ncol(x) = 2}.}

\item{textinfo}{Character: Info to show over each slice: "label", "percent", "label+percent".}

\item{font_size}{Integer: Font size for labels.}

\item{labs_col}{Character: Color of labels.}

\item{legend}{Logical: If TRUE, show legend.}

\item{legend_col}{Character: Color for legend.}

\item{sep_col}{Character: Separator color.}

\item{margin}{List: Margin settings.}

\item{padding}{Numeric: Padding between cells.}

\item{displayModeBar}{Logical: If TRUE, display the plotly mode bar.}

\item{modeBar_file_format}{Character: File format for image exports from the mode bar.}

\item{filename}{Character: File name to save plot.}

\item{file_width}{Integer: Width for saved file.}

\item{file_height}{Integer: Height for saved file.}

\item{file_scale}{Numeric: Scale for saved file.}
}
\value{
\code{plotly} object.
}
\description{
Draw interactive pie charts using \code{plotly}.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
draw_pie(VADeaths[, 1, drop = FALSE])
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_protein.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_protein.R
\name{draw_protein}
\alias{draw_protein}
\title{Plot an amino acid sequence with annotations}
\usage{
draw_protein(
  x,
  site = NULL,
  region = NULL,
  ptm = NULL,
  cleavage_site = NULL,
  variant = NULL,
  disease_variants = NULL,
  n_per_row = NULL,
  main = NULL,
  main_xy = c(0.055, 0.975),
  main_xref = "paper",
  main_yref = "paper",
  main_xanchor = "middle",
  main_yanchor = "top",
  layout = c("simple", "grid", "1curve", "2curve"),
  show_markers = TRUE,
  show_labels = TRUE,
  font_size = 18,
  label_col = NULL,
  scatter_mode = "markers+lines",
  marker_size = 28,
  marker_col = NULL,
  marker_alpha = 1,
  marker_symbol = "circle",
  line_col = NULL,
  line_alpha = 1,
  line_width = 2,
  show_full_names = TRUE,
  region_scatter_mode = "markers+lines",
  region_style = 3,
  region_marker_size = marker_size,
  region_marker_alpha = 0.6,
  region_marker_symbol = "circle",
  region_line_dash = "solid",
  region_line_shape = "line",
  region_line_smoothing = 1,
  region_line_width = 1,
  region_line_alpha = 0.6,
  theme = choose_theme(getOption("rtemis_theme")),
  region_palette = getOption("rtemis_palette", "rtms"),
  region_outline_only = FALSE,
  region_outline_pad = 2,
  region_pad = 0.35,
  region_fill_alpha = 0.1666666,
  region_fill_shape = "line",
  region_fill_smoothing = 1,
  bpadcx = 0.5,
  bpadcy = 0.5,
  site_marker_size = marker_size,
  site_marker_symbol = marker_symbol,
  site_marker_alpha = 1,
  site_border_width = 1.5,
  site_palette = getOption("rtemis_palette", "rtms"),
  variant_col = "#FA6E1E",
  disease_variant_col = "#E266AE",
  showlegend_ptm = TRUE,
  ptm_col = NULL,
  ptm_symbol = "circle",
  ptm_offset = 0.12,
  ptm_pad = 0.35,
  ptm_marker_size = marker_size/4.5,
  clv_col = NULL,
  clv_symbol = "triangle-down",
  clv_offset = 0.12,
  clv_pad = 0.35,
  clv_marker_size = marker_size/4,
  annotate_position_every = 10,
  annotate_position_alpha = 0.5,
  annotate_position_ay = -0.4 * marker_size,
  position_font_size = font_size - 6,
  legend_xy = c(0.97, 0.954),
  legend_xanchor = "left",
  legend_yanchor = "top",
  legend_orientation = "v",
  legend_col = NULL,
  legend_bg = "#FFFFFF00",
  legend_border_col = "#FFFFFF00",
  legend_borderwidth = 0,
  legend_group_gap = 0,
  margin = list(b = 0, l = 0, t = 0, r = 0, pad = 0),
  showgrid_x = FALSE,
  showgrid_y = FALSE,
  automargin_x = TRUE,
  automargin_y = TRUE,
  xaxis_autorange = TRUE,
  yaxis_autorange = "reversed",
  scaleanchor_y = "x",
  scaleratio_y = 1,
  hoverlabel_align = "left",
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  scrollZoom = TRUE,
  filename = NULL,
  file_width = 1320,
  file_height = 990,
  file_scale = 1,
  width = NULL,
  height = NULL,
  verbosity = 1L
)
}
\arguments{
\item{x}{Character vector: amino acid sequence (1-letter abbreviations) OR
\code{a3} object OR Character: path to JSON file OR Character: UniProt accession number.}

\item{site}{Named list of lists with indices of sites. These will be
highlighted by coloring the border of markers.}

\item{region}{Named list of lists with indices of regions. These will be
highlighted by coloring the markers and lines of regions using the
\code{palette} colors.}

\item{ptm}{List of post-translational modifications.}

\item{cleavage_site}{List of cleavage sites.}

\item{variant}{List of variant information.}

\item{disease_variants}{List of disease variant information.}

\item{n_per_row}{Integer: Number of amino acids to show per row.}

\item{main}{Character: Main title.}

\item{main_xy}{Numeric vector, length 2: x and y coordinates for title.
e.g. if \code{main_xref} and \code{main_yref} are \code{"paper"}:
\code{c(0.055, .975)} is top left, \code{c(.5, .975)} is top and
middle.}

\item{main_xref}{Character: xref for title.}

\item{main_yref}{Character: yref for title.}

\item{main_xanchor}{Character: xanchor for title.}

\item{main_yanchor}{Character: yanchor for title.}

\item{layout}{Character: "1curve", "grid": type of layout to use.}

\item{show_markers}{Logical: If TRUE, show amino acid markers.}

\item{show_labels}{Logical: If TRUE, annotate amino acids with elements.}

\item{font_size}{Integer: Font size for labels.}

\item{label_col}{Color for labels.}

\item{scatter_mode}{Character: Mode for scatter plot.}

\item{marker_size}{Integer: Size of markers.}

\item{marker_col}{Color for markers.}

\item{marker_alpha}{Numeric: Alpha for markers.}

\item{marker_symbol}{Character: Symbol for markers.}

\item{line_col}{Color for lines.}

\item{line_alpha}{Numeric: Alpha for lines.}

\item{line_width}{Numeric: Width for lines.}

\item{show_full_names}{Logical: If TRUE, show full names of amino acids.}

\item{region_scatter_mode}{Character: Mode for scatter plot.}

\item{region_style}{Integer: Style for regions.}

\item{region_marker_size}{Integer: Size of region markers.}

\item{region_marker_alpha}{Numeric: Alpha for region markers.}

\item{region_marker_symbol}{Character: Symbol for region markers.}

\item{region_line_dash}{Character: Dash for region lines.}

\item{region_line_shape}{Character: Shape for region lines.}

\item{region_line_smoothing}{Numeric: Smoothing for region lines.}

\item{region_line_width}{Numeric: Width for region lines.}

\item{region_line_alpha}{Numeric: Alpha for region lines.}

\item{theme}{\code{Theme} object.}

\item{region_palette}{Named list of colors for regions.}

\item{region_outline_only}{Logical: If TRUE, only show outline of regions.}

\item{region_outline_pad}{Numeric: Padding for region outline.}

\item{region_pad}{Numeric: Padding for region.}

\item{region_fill_alpha}{Numeric: Alpha for region fill.}

\item{region_fill_shape}{Character: Shape for region fill.}

\item{region_fill_smoothing}{Numeric: Smoothing for region fill.}

\item{bpadcx}{Numeric: Padding for region border.}

\item{bpadcy}{Numeric: Padding for region border.}

\item{site_marker_size}{Integer: Size of site markers.}

\item{site_marker_symbol}{Character: Symbol for site markers.}

\item{site_marker_alpha}{Numeric: Alpha for site markers.}

\item{site_border_width}{Numeric: Width for site borders.}

\item{site_palette}{Named list of colors for sites.}

\item{variant_col}{Color for variants.}

\item{disease_variant_col}{Color for disease variants.}

\item{showlegend_ptm}{Logical: If TRUE, show legend for PTMs.}

\item{ptm_col}{Named list of colors for PTMs.}

\item{ptm_symbol}{Character: Symbol for PTMs.}

\item{ptm_offset}{Numeric: Offset for PTMs.}

\item{ptm_pad}{Numeric: Padding for PTMs.}

\item{ptm_marker_size}{Integer: Size of PTM markers.}

\item{clv_col}{Color for cleavage site annotations.}

\item{clv_symbol}{Character: Symbol for cleavage site annotations.}

\item{clv_offset}{Numeric: Offset for cleavage site annotations.}

\item{clv_pad}{Numeric: Padding for cleavage site annotations.}

\item{clv_marker_size}{Integer: Size of cleavage site annotation markers.}

\item{annotate_position_every}{Integer: Annotate every nth position.}

\item{annotate_position_alpha}{Numeric: Alpha for position annotations.}

\item{annotate_position_ay}{Numeric: Y offset for position annotations.}

\item{position_font_size}{Integer: Font size for position annotations.}

\item{legend_xy}{Numeric vector, length 2: x and y coordinates for legend.}

\item{legend_xanchor}{Character: xanchor for legend.}

\item{legend_yanchor}{Character: yanchor for legend.}

\item{legend_orientation}{Character: Orientation for legend.}

\item{legend_col}{Color for legend.}

\item{legend_bg}{Color for legend background.}

\item{legend_border_col}{Color for legend border.}

\item{legend_borderwidth}{Numeric: Width for legend border.}

\item{legend_group_gap}{Numeric: Gap between legend groups.}

\item{margin}{List: Margin settings.}

\item{showgrid_x}{Logical: If TRUE, show x grid.}

\item{showgrid_y}{Logical: If TRUE, show y grid.}

\item{automargin_x}{Logical: If TRUE, use automatic margin for x axis.}

\item{automargin_y}{Logical: If TRUE, use automatic margin for y axis.}

\item{xaxis_autorange}{Logical: If TRUE, use automatic range for x axis.}

\item{yaxis_autorange}{Character: If TRUE, use automatic range for y axis.}

\item{scaleanchor_y}{Character: Scale anchor for y axis.}

\item{scaleratio_y}{Numeric: Scale ratio for y axis.}

\item{hoverlabel_align}{Character: Alignment for hover label.}

\item{displayModeBar}{Logical: If TRUE, display mode bar.}

\item{modeBar_file_format}{Character: File format for mode bar.}

\item{scrollZoom}{Logical: If TRUE, enable scroll zoom.}

\item{filename}{Character: File name to save plot.}

\item{file_width}{Integer: Width for saved file.}

\item{file_height}{Integer: Height for saved file.}

\item{file_scale}{Numeric: Scale for saved file.}

\item{width}{Integer: Width for plot.}

\item{height}{Integer: Height for plot.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{plotly} object.
}
\description{
Plot an amino acid sequence with multiple site and/or region annotations.
}
\examples{
\dontrun{
  # Reads sequence from UniProt server
  tau <- seqinr::read.fasta("https://rest.uniprot.org/uniprotkb/P10636.fasta",
    seqtype = "AA"
  )
  draw_protein(as.character(tau[[1]]))

  # or directly using the UniProt accession number:
  draw_protein("P10636")
}
}
\author{
EDG
}


================================================
FILE: man/draw_pvals.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_pvals.R
\name{draw_pvals}
\alias{draw_pvals}
\title{Barplot p-values using \link{draw_bar}}
\usage{
draw_pvals(
  x,
  xnames = NULL,
  yname = NULL,
  p_adjust_method = "none",
  pval_hline = 0.05,
  hline_col = rt_red,
  hline_dash = "dash",
  ...
)
}
\arguments{
\item{x}{Float, vector: p-values.}

\item{xnames}{Character, vector: feature names.}

\item{yname}{Character: outcome name.}

\item{p_adjust_method}{Character: method for \link{p.adjust}.}

\item{pval_hline}{Float: Significance level at which to plot horizontal line.}

\item{hline_col}{Color for \code{pval_hline}.}

\item{hline_dash}{Character: type of line to draw.}

\item{...}{Additional arguments passed to \link{draw_bar}.}
}
\value{
\code{plotly} object.
}
\description{
Plot 1 - p-values as a barplot
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
draw_pvals(c(0.01, 0.02, 0.03), xnames = c("Feature1", "Feature2", "Feature3"))
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_roc.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_roc.R
\name{draw_roc}
\alias{draw_roc}
\title{Draw ROC curve}
\usage{
draw_roc(
  true_labels,
  predicted_prob,
  multiclass_fill_labels = TRUE,
  main = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  legend = TRUE,
  legend_title = "Group (AUC)",
  legend_xy = c(1, 0),
  legend_xanchor = "right",
  legend_yanchor = "bottom",
  auc_dp = 3L,
  xlim = c(-0.05, 1.05),
  ylim = c(-0.05, 1.05),
  diagonal = TRUE,
  diagonal_col = NULL,
  axes_square = TRUE,
  filename = NULL,
  ...
)
}
\arguments{
\item{true_labels}{Factor: True outcome labels.}

\item{predicted_prob}{Numeric vector [0, 1]: Predicted probabilities for the positive class (i.e. second level of outcome).
Or, for multiclass, a matrix of predicted probabilities with one column per class.
Or, a list of such vectors/matrices to draw multiple ROC curves on the same plot.}

\item{multiclass_fill_labels}{Logical: If TRUE, fill in labels for multiclass ROC curves.
If FALSE, column names of \code{predicted_prob} must match levels of \code{true_labels}.}

\item{main}{Character: Main title for the plot.}

\item{theme}{\code{Theme} object.}

\item{palette}{Character vector: Colors to use.}

\item{legend}{Logical: If TRUE, draw legend.}

\item{legend_title}{Character: Title for the legend.}

\item{legend_xy}{Numeric vector: Position of the legend in the form c(x, y).}

\item{legend_xanchor}{Character: X anchor for the legend.}

\item{legend_yanchor}{Character: Y anchor for the legend.}

\item{auc_dp}{Integer: Number of decimal places for AUC values.}

\item{xlim}{Numeric vector: Limits for the x-axis.}

\item{ylim}{Numeric vector: Limits for the y-axis.}

\item{diagonal}{Logical: If TRUE, draw diagonal line.}

\item{diagonal_col}{Character: Color for the diagonal line.}

\item{axes_square}{Logical: If TRUE, make axes square.}

\item{filename}{Character: If provided, save the plot to this file.}

\item{...}{Additional arguments passed to \link{draw_scatter}.}
}
\value{
\code{plotly} object.
}
\description{
Draw ROC curve
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# Binary classification
true_labels <- factor(c("A", "B", "A", "A", "B", "A", "B", "B", "A", "B"))
predicted_prob <- c(0.1, 0.4, 0.35, 0.8, 0.65, 0.2, 0.9, 0.55, 0.3, 0.7)
draw_roc(true_labels, predicted_prob)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_scatter.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_scatter.R
\name{draw_scatter}
\alias{draw_scatter}
\title{Interactive Scatter Plots}
\usage{
draw_scatter(
  x,
  y = NULL,
  fit = NULL,
  se_fit = FALSE,
  se_times = 1.96,
  include_fit_name = TRUE,
  cluster = NULL,
  cluster_config = list(k = 2),
  group = NULL,
  rsq = TRUE,
  mode = "markers",
  order_on_x = NULL,
  main = NULL,
  subtitle = NULL,
  xlab = NULL,
  ylab = NULL,
  alpha = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  axes_square = FALSE,
  group_names = NULL,
  font_size = 16,
  marker_col = NULL,
  marker_size = 8,
  symbol = "circle",
  fit_col = NULL,
  fit_alpha = 0.8,
  fit_lwd = 2.5,
  line_shape = "linear",
  se_col = NULL,
  se_alpha = 0.4,
  scatter_type = "scatter",
  show_marginal_x = FALSE,
  show_marginal_y = FALSE,
  marginal_x = x,
  marginal_y = y,
  marginal_x_y = NULL,
  marginal_y_x = NULL,
  marginal_col = NULL,
  marginal_alpha = 0.333,
  marginal_size = 10,
  legend = NULL,
  legend_title = NULL,
  legend_trace = TRUE,
  legend_xy = c(0, 0.98),
  legend_xanchor = "left",
  legend_yanchor = "auto",
  legend_orientation = "v",
  legend_col = NULL,
  legend_bg = "#FFFFFF00",
  legend_border_col = "#FFFFFF00",
  legend_borderwidth = 0,
  legend_group_gap = 0,
  x_showspikes = FALSE,
  y_showspikes = FALSE,
  spikedash = "solid",
  spikemode = "across",
  spikesnap = "hovered data",
  spikecolor = NULL,
  spikethickness = 1,
  margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0),
  main_y = 1.01,
  main_yanchor = "bottom",
  subtitle_x = 0.02,
  subtitle_y = 0.99,
  subtitle_xref = "paper",
  subtitle_yref = "paper",
  subtitle_xanchor = "left",
  subtitle_yanchor = "top",
  automargin_x = TRUE,
  automargin_y = TRUE,
  xlim = NULL,
  ylim = NULL,
  axes_equal = FALSE,
  diagonal = FALSE,
  diagonal_col = NULL,
  diagonal_dash = "dot",
  diagonal_alpha = 0.66,
  fit_params = NULL,
  vline = NULL,
  vline_col = theme[["fg"]],
  vline_width = 1,
  vline_dash = "dot",
  hline = NULL,
  hline_col = theme[["fg"]],
  hline_width = 1,
  hline_dash = "dot",
  hovertext = NULL,
  width = NULL,
  height = NULL,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  scrollZoom = TRUE,
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  verbosity = 0L
)
}
\arguments{
\item{x}{Numeric, vector/data.frame/list: x-axis data. If y is NULL and \code{NCOL(x) > 1}, first two columns used as \code{x} and \code{y}, respectively.}

\item{y}{Numeric, vector/data.frame/list: y-axis data.}

\item{fit}{Character: Fit method.}

\item{se_fit}{Logical: If TRUE, include standard error of the fit.}

\item{se_times}{Numeric: Multiplier for standard error.}

\item{include_fit_name}{Logical: If TRUE, include fit name in legend.}

\item{cluster}{Character: Clustering method.}

\item{cluster_config}{List: Config for clustering.}

\item{group}{Factor: Grouping variable.}

\item{rsq}{Logical: If TRUE, print R-squared values in legend if \code{fit} is set.}

\item{mode}{Character, vector: "markers", "lines", "markers+lines".}

\item{order_on_x}{Logical: If TRUE, order \code{x} and \code{y} on \code{x}.}

\item{main}{Character: Main title.}

\item{subtitle}{Character: Subtitle.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{alpha}{Numeric: Alpha for markers.}

\item{theme}{\code{Theme} object.}

\item{palette}{Character vector: Colors to use.}

\item{axes_square}{Logical: If TRUE, draw a square plot.}

\item{group_names}{Character: Names for groups.}

\item{font_size}{Numeric: Font size.}

\item{marker_col}{Color for markers.}

\item{marker_size}{Numeric: Marker size.}

\item{symbol}{Character: Marker symbol.}

\item{fit_col}{Color for fit line.}

\item{fit_alpha}{Numeric: Alpha for fit line.}

\item{fit_lwd}{Numeric: Line width for fit line.}

\item{line_shape}{Character: Line shape for line plots. Options: "linear", "hv", "vh", "hvh", "vhv".}

\item{se_col}{Color for standard error band.}

\item{se_alpha}{Numeric: Alpha for standard error band.}

\item{scatter_type}{Character: Scatter plot type.}

\item{show_marginal_x}{Logical: If TRUE, add marginal distribution line markers on x-axis.}

\item{show_marginal_y}{Logical: If TRUE, add marginal distribution line markers on y-axis.}

\item{marginal_x}{Numeric: Data for marginal distribution on x-axis.}

\item{marginal_y}{Numeric: Data for marginal distribution on y-axis.}

\item{marginal_x_y}{Numeric: Y position of marginal markers on x-axis.}

\item{marginal_y_x}{Numeric: X position of marginal markers on y-axis.}

\item{marginal_col}{Color for marginal markers.}

\item{marginal_alpha}{Numeric: Alpha for marginal markers.}

\item{marginal_size}{Numeric: Size of marginal markers.}

\item{legend}{Logical: If TRUE, draw legend.}

\item{legend_title}{Character: Title for legend.}

\item{legend_trace}{Logical: If TRUE, draw legend trace. (For when you have \code{fit} and don't want a trace for the markers.)}

\item{legend_xy}{Numeric: Position of legend.}

\item{legend_xanchor}{Character: X anchor for legend.}

\item{legend_yanchor}{Character: Y anchor for legend.}

\item{legend_orientation}{Character: Orientation of legend.}

\item{legend_col}{Color for legend text.}

\item{legend_bg}{Color for legend background.}

\item{legend_border_col}{Color for legend border.}

\item{legend_borderwidth}{Numeric: Border width for legend.}

\item{legend_group_gap}{Numeric: Gap between legend groups.}

\item{x_showspikes}{Logical: If TRUE, show spikes on x-axis.}

\item{y_showspikes}{Logical: If TRUE, show spikes on y-axis.}

\item{spikedash}{Character: Dash type for spikes.}

\item{spikemode}{Character: Spike mode.}

\item{spikesnap}{Character: Spike snap mode.}

\item{spikecolor}{Color for spikes.}

\item{spikethickness}{Numeric: Thickness of spikes.}

\item{margin}{List: Plot margins.}

\item{main_y}{Numeric: Y position of main title.}

\item{main_yanchor}{Character: Y anchor for main title.}

\item{subtitle_x}{Numeric: X position of subtitle.}

\item{subtitle_y}{Numeric: Y position of subtitle.}

\item{subtitle_xref}{Character: X reference for subtitle.}

\item{subtitle_yref}{Character: Y reference for subtitle.}

\item{subtitle_xanchor}{Character: X anchor for subtitle.}

\item{subtitle_yanchor}{Character: Y anchor for subtitle.}

\item{automargin_x}{Logical: If TRUE, automatically adjust x-axis margins.}

\item{automargin_y}{Logical: If TRUE, automatically adjust y-axis margins.}

\item{xlim}{Numeric: Limits for x-axis.}

\item{ylim}{Numeric: Limits for y-axis.}

\item{axes_equal}{Logical: If TRUE, set equal scaling for axes.}

\item{diagonal}{Logical: If TRUE, add diagonal line.}

\item{diagonal_col}{Color for diagonal line.}

\item{diagonal_dash}{Character: "solid", "dash", "dot", "dashdot", "longdash", "longdashdot". Dash type for diagonal line.}

\item{diagonal_alpha}{Numeric: Alpha for diagonal line.}

\item{fit_params}{\code{Hyperparameters} for fit.}

\item{vline}{Numeric: X position for vertical line.}

\item{vline_col}{Color for vertical line.}

\item{vline_width}{Numeric: Width for vertical line.}

\item{vline_dash}{Character: Dash type for vertical line.}

\item{hline}{Numeric: Y position for horizontal line.}

\item{hline_col}{Color for horizontal line.}

\item{hline_width}{Numeric: Width for horizontal line.}

\item{hline_dash}{Character: Dash type for horizontal line.}

\item{hovertext}{List: Hover text for markers.}

\item{width}{Numeric: Width of plot.}

\item{height}{Numeric: Height of plot.}

\item{displayModeBar}{Logical: If TRUE, display mode bar.}

\item{modeBar_file_format}{Character: File format for mode bar.}

\item{scrollZoom}{Logical: If TRUE, enable scroll zoom.}

\item{filename}{Character: Filename to save plot.}

\item{file_width}{Numeric: Width of saved file.}

\item{file_height}{Numeric: Height of saved file.}

\item{file_scale}{Numeric: Scale of saved file.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{plotly} object.
}
\description{
Draw interactive scatter plots using \code{plotly}.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
draw_scatter(iris$Sepal.Length, iris$Petal.Length,
  fit = "gam", se_fit = TRUE, group = iris$Species
)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_spectrogram.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_spectrogram.R
\name{draw_spectrogram}
\alias{draw_spectrogram}
\title{Interactive Spectrogram}
\usage{
draw_spectrogram(
  x,
  y,
  z,
  colorgrad_n = 101,
  colors = NULL,
  xlab = "Time",
  ylab = "Frequency",
  zlab = "Power",
  hover_xlab = xlab,
  hover_ylab = ylab,
  hover_zlab = zlab,
  zmin = NULL,
  zmax = NULL,
  zauto = TRUE,
  hoverlabel_align = "right",
  colorscale = "Jet",
  colorbar_y = 0.5,
  colorbar_yanchor = "middle",
  colorbar_xpad = 0,
  colorbar_ypad = 0,
  colorbar_len = 0.75,
  colorbar_title_side = "bottom",
  showgrid = FALSE,
  space = "rgb",
  lo = "#18A3AC",
  lomid = NULL,
  mid = NULL,
  midhi = NULL,
  hi = "#F48024",
  grid_gap = 0,
  limits = NULL,
  main = NULL,
  key_title = NULL,
  showticklabels = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  font_size = NULL,
  padding = 0,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  ...
)
}
\arguments{
\item{x}{Numeric: Time.}

\item{y}{Numeric: Frequency.}

\item{z}{Numeric: Power.}

\item{colorgrad_n}{Integer: Number of colors in the gradient.}

\item{colors}{Character: Custom colors for the gradient.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{zlab}{Character: z-axis label.}

\item{hover_xlab}{Character: x-axis label for hover.}

\item{hover_ylab}{Character: y-axis label for hover.}

\item{hover_zlab}{Character: z-axis label for hover.}

\item{zmin}{Numeric: Minimum value for color scale.}

\item{zmax}{Numeric: Maximum value for color scale.}

\item{zauto}{Logical: If TRUE, automatically set zmin and zmax.}

\item{hoverlabel_align}{Character: Alignment of hover labels.}

\item{colorscale}{Character: Color scale.}

\item{colorbar_y}{Numeric: Y position of colorbar.}

\item{colorbar_yanchor}{Character: Y anchor of colorbar.}

\item{colorbar_xpad}{Numeric: X padding of colorbar.}

\item{colorbar_ypad}{Numeric: Y padding of colorbar.}

\item{colorbar_len}{Numeric: Length of colorbar.}

\item{colorbar_title_side}{Character: Side of colorbar title.}

\item{showgrid}{Logical: If TRUE, show grid.}

\item{space}{Character: Color space for gradient.}

\item{lo}{Character: Low color for gradient.}

\item{lomid}{Character: Low-mid color for gradient.}

\item{mid}{Character: Mid color for gradient.}

\item{midhi}{Character: Mid-high color for gradient.}

\item{hi}{Character: High color for gradient.}

\item{grid_gap}{Integer: Space between cells.}

\item{limits}{Numeric, length 2: Determine color range. Default = NULL, which automatically centers values around 0.}

\item{main}{Character: Main title.}

\item{key_title}{Character: Title of the key.}

\item{showticklabels}{Logical: If TRUE, show tick labels.}

\item{theme}{\code{Theme} object.}

\item{font_size}{Numeric: Font size.}

\item{padding}{Numeric: Padding between cells.}

\item{displayModeBar}{Logical: If TRUE, display the plotly mode bar.}

\item{modeBar_file_format}{Character: File format for image exports from the mode bar.}

\item{filename}{Character: Filename to save the plot. Default is NULL.}

\item{file_width}{Numeric: Width of exported image.}

\item{file_height}{Numeric: Height of exported image.}

\item{file_scale}{Numeric: Scale of exported image.}

\item{...}{Additional arguments to be passed to \code{heatmaply::heatmaply}.}
}
\value{
\code{plotly} object.
}
\description{
Draw interactive spectrograms using \code{plotly}
}
\details{
To set custom colors, use a minimum of \code{lo} and \code{hi}, optionally also
\code{lomid}, \code{mid}, \code{midhi} colors and set \code{colorscale = NULL}.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# Example data
time <- seq(0, 10, length.out = 100)
freq <- seq(1, 100, length.out = 100)
power <- outer(time, freq, function(t, f) sin(t) * cos(f))
draw_spectrogram(
  x = time,
  y = freq,
  z = power
)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_survfit.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_survfit.R
\name{draw_survfit}
\alias{draw_survfit}
\title{Draw a survfit object}
\usage{
draw_survfit(
  x,
  mode = "lines",
  symbol = "cross",
  line_shape = "hv",
  xlim = NULL,
  ylim = NULL,
  xlab = "Time",
  ylab = "Survival",
  main = NULL,
  legend_xy = c(1, 1),
  legend_xanchor = "right",
  legend_yanchor = "top",
  theme = choose_theme(getOption("rtemis_theme")),
  nrisk_table = FALSE,
  filename = NULL,
  ...
)
}
\arguments{
\item{x}{\code{survfit} object created by \link[survival:survfit]{survival::survfit}.}

\item{mode}{Character, vector: "markers", "lines", "markers+lines".}

\item{symbol}{Character: Symbol to use for the points.}

\item{line_shape}{Character: Line shape for line plots. Options: "linear", "hv", "vh", "hvh", "vhv".}

\item{xlim}{Numeric vector of length 2: x-axis limits.}

\item{ylim}{Numeric vector of length 2: y-axis limits.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{main}{Character: Main title.}

\item{legend_xy}{Numeric: Position of legend.}

\item{legend_xanchor}{Character: X anchor for legend.}

\item{legend_yanchor}{Character: Y anchor for legend.}

\item{theme}{\code{Theme} object.}

\item{nrisk_table}{Logical: If \code{TRUE}, subplot a table of the number at risk at each time point.}

\item{filename}{Character: Filename to save plot.}

\item{...}{Additional arguments passed to \link{draw_scatter}.}
}
\value{
\code{plotly} object.
}
\description{
Draw a \code{survfit} object using \link{draw_scatter}.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# Get the lung dataset
data(cancer, package = "survival")
sf1 <- survival::survfit(survival::Surv(time, status) ~ 1, data = lung)
draw_survfit(sf1)
sf2 <- survival::survfit(survival::Surv(time, status) ~ sex, data = lung)
draw_survfit(sf2)
# with N at risk table
draw_survfit(sf2)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_table.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_table.R
\name{draw_table}
\alias{draw_table}
\title{Simple HTML table}
\usage{
draw_table(
  x,
  .ddSci = TRUE,
  main = NULL,
  main_col = "black",
  main_x = 0,
  main_xanchor = "auto",
  fill_col = "#18A3AC",
  table_bg = "white",
  bg = "white",
  line_col = "white",
  lwd = 1,
  header_font_col = "white",
  table_font_col = "gray20",
  font_size = 14,
  font_family = "Helvetica Neue",
  margin = list(l = 0, r = 5, t = 30, b = 0, pad = 0)
)
}
\arguments{
\item{x}{data.frame: Table to draw}

\item{.ddSci}{Logical: If TRUE, apply \link{ddSci} to numeric columns.}

\item{main}{Character: Table tile.}

\item{main_col}{Color: Title color.}

\item{main_x}{Float [0, 1]: Align title: 0: left, .5: center, 1: right.}

\item{main_xanchor}{Character: "auto", "left", "right": plotly's layout xanchor for
title.}

\item{fill_col}{Color: Used to fill header with column names and first column with
row names.}

\item{table_bg}{Color: Table background.}

\item{bg}{Color: Background.}

\item{line_col}{Color: Line color.}

\item{lwd}{Float: Line width.}

\item{header_font_col}{Color: Header font color.}

\item{table_font_col}{Color: Table font color.}

\item{font_size}{Integer: Font size.}

\item{font_family}{Character: Font family.}

\item{margin}{List: plotly's margins.}
}
\value{
\code{plotly} object.
}
\description{
Draw an html table using \code{plotly}
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
 df <- data.frame(
   Name = c("Alice", "Bob", "Charlie"),
   Age = c(25, 30, 35),
   Score = c(90.5, 85.0, 88.0)
)
p <- draw_table(
  df,
  main = "Sample Table",
  main_col = "#00b2b2"
)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_ts.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_ts.R
\name{draw_ts}
\alias{draw_ts}
\title{Interactive Timeseries Plots}
\usage{
draw_ts(
  x,
  time,
  window = 7L,
  group = NULL,
  roll_fn = c("mean", "median", "max", "none"),
  roll_col = NULL,
  roll_alpha = 1,
  roll_lwd = 2,
  roll_name = NULL,
  alpha = NULL,
  align = "center",
  group_names = NULL,
  xlab = "Time",
  n_xticks = 12,
  scatter_type = "scatter",
  legend = TRUE,
  x_showspikes = TRUE,
  y_showspikes = FALSE,
  spikedash = "solid",
  spikemode = "across",
  spikesnap = "hovered data",
  spikecolor = NULL,
  spikethickness = 1,
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  theme = choose_theme(getOption("rtemis_theme")),
  palette = getOption("rtemis_palette", "rtms"),
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  ...
)
}
\arguments{
\item{x}{Numeric vector of values to plot or list of vectors}

\item{time}{Numeric or Date vector of time corresponding to values of \code{x}}

\item{window}{Integer: apply \code{roll_fn} over this many units of time}

\item{group}{Factor defining groups}

\item{roll_fn}{Character: "mean", "median", "max", or "sum": Function to apply on
rolling windows of \code{x}}

\item{roll_col}{Color for rolling line}

\item{roll_alpha}{Numeric: transparency for rolling line}

\item{roll_lwd}{Numeric: width of rolling line}

\item{roll_name}{Rolling function name (for annotation)}

\item{alpha}{Numeric [0, 1]: Transparency}

\item{align}{Character: "center", "right", or "left"}

\item{group_names}{Character vector of group names}

\item{xlab}{Character: x-axis label}

\item{n_xticks}{Integer: number of x-axis ticks to use (approximately)}

\item{scatter_type}{Character: "scatter" or "lines"}

\item{legend}{Logical: If TRUE, show legend}

\item{x_showspikes}{Logical: If TRUE, show x-axis spikes on hover}

\item{y_showspikes}{Logical: If TRUE, show y-axis spikes on hover}

\item{spikedash}{Character: dash type string ("solid", "dot", "dash",
"longdash", "dashdot", or "longdashdot") or a dash length list in px
(eg "5px,10px,2px,2px")}

\item{spikemode}{Character: If "toaxis", spike line is drawn from the data
point to the axis the series is plotted on. If "across", the line is drawn
across the entire plot area, and supercedes "toaxis". If "marker", then a
marker dot is drawn on the axis the series is plotted on}

\item{spikesnap}{Character: "data", "cursor", "hovered data". Determines
whether spikelines are stuck to the cursor or to the closest datapoints.}

\item{spikecolor}{Color for spike lines}

\item{spikethickness}{Numeric: spike line thickness}

\item{displayModeBar}{Logical: If TRUE, display plotly's modebar}

\item{modeBar_file_format}{Character: modeBar image export file format}

\item{theme}{\code{Theme} object.}

\item{palette}{Character: palette name, or list of colors}

\item{filename}{Character: Path to filename to save plot}

\item{file_width}{Numeric: image export width}

\item{file_height}{Numeric: image export height}

\item{file_scale}{Numeric: image export scale}

\item{...}{Additional arguments to be passed to \link{draw_scatter}}
}
\value{
\code{plotly} object.
}
\description{
Draw interactive timeseries plots using \code{plotly}
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
time <- sample(seq(as.Date("2020-03-01"), as.Date("2020-09-23"), length.out = 140))
x1 <- rnorm(140)
x2 <- rnorm(140, 1, 1.2)
# Single timeseries
draw_ts(x1, time)
# Multiple timeseries input as list
draw_ts(list(Alpha = x1, Beta = x2), time)
# Multiple timeseries grouped by group, different lengths
time1 <- sample(seq(as.Date("2020-03-01"), as.Date("2020-07-23"), length.out = 100))
time2 <- sample(seq(as.Date("2020-05-01"), as.Date("2020-09-23"), length.out = 140))
time <- c(time1, time2)
x <- c(rnorm(100), rnorm(140, 1, 1.5))
group <- c(rep("Alpha", 100), rep("Beta", 140))
draw_ts(x, time, 7, group)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_varimp.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_varimp.R
\name{draw_varimp}
\alias{draw_varimp}
\title{Interactive Variable Importance Plot}
\usage{
draw_varimp(
  x,
  names = NULL,
  main = NULL,
  type = c("bar", "line"),
  xlab = NULL,
  ylab = NULL,
  plot_top = 1,
  orientation = "v",
  line_width = 12,
  labelify = TRUE,
  alpha = 1,
  palette = get_palette(getOption("rtemis_palette")),
  mar = NULL,
  font_size = 16,
  axis_font_size = 14,
  theme = choose_theme(getOption("rtemis_theme")),
  showlegend = TRUE,
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1
)
}
\arguments{
\item{x}{Numeric vector (or coercible to numeric): Input.}

\item{names}{Vector, string: Names of features.}

\item{main}{Character: Main title.}

\item{type}{Character: "bar" or "line".}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{plot_top}{Integer: Plot this many top features.}

\item{orientation}{Character: "h" or "v".}

\item{line_width}{Numeric: Line width.}

\item{labelify}{Logical: If TRUE, labelify feature names.}

\item{alpha}{Numeric: Transparency.}

\item{palette}{Character vector: Colors to use.}

\item{mar}{Vector, numeric, length 4: Plot margins in pixels (NOT inches).}

\item{font_size}{Integer: Overall font size to use (essentially for the
title at this point).}

\item{axis_font_size}{Integer: Font size to use for axis labels and tick labels.}

\item{theme}{\code{Theme} object.}

\item{showlegend}{Logical: If TRUE, show legend.}

\item{filename}{Character: Path to save the plot image.}

\item{file_width}{Numeric: Width of the saved plot image.}

\item{file_height}{Numeric: Height of the saved plot image.}

\item{file_scale}{Numeric: Scale of the saved plot image.}
}
\value{
\code{plotly} object.
}
\description{
Plot variable importance using \code{plotly}
}
\details{
A simple \code{plotly} wrapper to plot horizontal barplots, sorted by value,
which can be used to visualize variable importance, model coefficients, etc.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# synthetic data
x <- rnorm(10)
names(x) <- paste0("Feature_", seq(x))
draw_varimp(x)
draw_varimp(x, orientation = "h")
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_volcano.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_volcano.R
\name{draw_volcano}
\alias{draw_volcano}
\title{Volcano Plot}
\usage{
draw_volcano(
  x,
  pvals,
  xnames = NULL,
  group = NULL,
  x_thresh = 0,
  p_thresh = 0.05,
  p_adjust_method = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr",
    "none"),
  p_transform = function(x) -log10(x),
  legend = NULL,
  legend_lo = NULL,
  legend_hi = NULL,
  label_lo = "Low",
  label_hi = "High",
  main = NULL,
  xlab = NULL,
  ylab = NULL,
  margin = list(b = 65, l = 65, t = 50, r = 10, pad = 0),
  xlim = NULL,
  ylim = NULL,
  alpha = NULL,
  hline = NULL,
  hline_col = NULL,
  hline_width = 1,
  hline_dash = "solid",
  hline_annotate = NULL,
  hline_annotation_x = 1,
  theme = choose_theme(getOption("rtemis_theme")),
  annotate = TRUE,
  annotate_col = theme[["labs_col"]],
  font_size = 16,
  palette = NULL,
  legend_x_lo = NULL,
  legend_x_hi = NULL,
  legend_y = 0.97,
  annotate_n = 7L,
  ax_lo = NULL,
  ay_lo = NULL,
  ax_hi = NULL,
  ay_hi = NULL,
  annotate_alpha = 0.7,
  hovertext = NULL,
  displayModeBar = "hover",
  filename = NULL,
  file_width = 500,
  file_height = 500,
  file_scale = 1,
  verbosity = 1L,
  ...
)
}
\arguments{
\item{x}{Numeric vector: Input values, e.g. log2 fold change, coefficients, etc.}

\item{pvals}{Numeric vector: p-values.}

\item{xnames}{Character vector: \code{x} names.}

\item{group}{Optional factor: Used to color code points. If NULL, significant points
below \code{x_thresh}, non-significant points, and significant points
above \code{x_thresh} will be plotted with the first, second and third
color of \code{palette}.}

\item{x_thresh}{Numeric x-axis threshold separating low from high.}

\item{p_thresh}{Numeric: p-value threshold of significance.}

\item{p_adjust_method}{Character: p-value adjustment method.
"holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none".
Default = "holm". Use "none" for raw p-values.}

\item{p_transform}{function.}

\item{legend}{Logical: If TRUE, show legend. Will default to FALSE, if
\code{group = NULL}, otherwise to TRUE.}

\item{legend_lo}{Character: Legend to annotate significant points below the
\code{x_thresh}.}

\item{legend_hi}{Character: Legend to annotate significant points above the
\code{x_thresh}.}

\item{label_lo}{Character: label for low values.}

\item{label_hi}{Character: label for high values.}

\item{main}{Character: Main title.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{margin}{Named list of plot margins.
Default = \code{list(b = 65, l = 65, t = 50, r = 10, pad = 0)}.}

\item{xlim}{Numeric vector, length 2: x-axis limits.}

\item{ylim}{Numeric vector, length 2: y-axis limits.}

\item{alpha}{Numeric: point transparency.}

\item{hline}{Numeric: If defined, draw a horizontal line at this y value.}

\item{hline_col}{Color for \code{hline}.}

\item{hline_width}{Numeric: Width for \code{hline}.}

\item{hline_dash}{Character: Type of line to draw: "solid", "dot", "dash",
"longdash", "dashdot", or "longdashdot".}

\item{hline_annotate}{Character: Text of horizontal line annotation if
\code{hline} is set.}

\item{hline_annotation_x}{Numeric: x position to place annotation with paper
as reference. 0: to the left of the plot area; 1: to the right of the plot area.}

\item{theme}{\code{Theme} object.}

\item{annotate}{Logical: If TRUE, annotate significant points.}

\item{annotate_col}{Color for annotations.}

\item{font_size}{Integer: Font size.}

\item{palette}{Character vector: Colors to use. If \code{group} is NULL, the first, second and third
colors will be used for significant points with negative coefficients, non-significant points, and
significant points with positive coefficients, respectively. If \code{group} is not NULL, colors will
be assigned to groups, in order of appearance.}

\item{legend_x_lo}{Numeric: x position of \code{legend_lo}.}

\item{legend_x_hi}{Numeric: x position of \code{legend_hi}.}

\item{legend_y}{Numeric: y position for \code{legend_lo} and \code{legend_hi}.}

\item{annotate_n}{Integer: Number of significant points to annotate.}

\item{ax_lo}{Numeric: Sets the x component of the arrow tail about the arrow head for
significant points below \code{x_thresh}.}

\item{ay_lo}{Numeric: Sets the y component of the arrow tail about the arrow head for
significant points below \code{x_thresh}.}

\item{ax_hi}{Numeric: Sets the x component of the arrow tail about the arrow head for
significant points above \code{x_thresh}.}

\item{ay_hi}{Numeric: Sets the y component of the arrow tail about the arrow head for
significant points above \code{x_thresh}.}

\item{annotate_alpha}{Numeric: Transparency for annotations.}

\item{hovertext}{Character vector: Text to display on hover.}

\item{displayModeBar}{Logical: If TRUE, display plotly mode bar.}

\item{filename}{Character: Path to save the plot image.}

\item{file_width}{Numeric: Width of the saved plot image.}

\item{file_height}{Numeric: Height of the saved plot image.}

\item{file_scale}{Numeric: Scale of the saved plot image.}

\item{verbosity}{Integer: Verbosity level.}

\item{...}{Additional arguments passed to \link{draw_scatter}.}
}
\value{
\code{plotly} object.
}
\description{
Volcano Plot
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
set.seed(2019)
y <- rnormmat(500, 500, return_df = TRUE)
x <- data.frame(x = y[, 3] + y[, 5] - y[, 9] + y[, 15] + rnorm(500))
mod <- massGLM(x, y)
draw_volcano(summary(mod)[["Coefficient_x"]], summary(mod)[["p_value_x"]])
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/draw_xt.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/draw_xt.R
\name{draw_xt}
\alias{draw_xt}
\title{Plot timeseries data}
\usage{
draw_xt(
  x,
  y,
  x2 = NULL,
  y2 = NULL,
  which_xy = NULL,
  which_xy2 = NULL,
  shade_bin = NULL,
  shade_interval = NULL,
  shade_col = NULL,
  shade_x = NULL,
  shade_name = "",
  shade_showlegend = FALSE,
  ynames = NULL,
  y2names = NULL,
  xlab = NULL,
  ylab = NULL,
  y2lab = NULL,
  xunits = NULL,
  yunits = NULL,
  y2units = NULL,
  yunits_col = NULL,
  y2units_col = NULL,
  zt = NULL,
  show_zt = TRUE,
  show_zt_every = NULL,
  zt_nticks = 18L,
  main = NULL,
  main_y = 1,
  main_yanchor = "bottom",
  x_nticks = 0,
  y_nticks = 0,
  show_rangeslider = NULL,
  slider_start = NULL,
  slider_end = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  palette = get_palette(getOption("rtemis_palette")),
  font_size = 16,
  yfill = "none",
  y2fill = "none",
  fill_alpha = 0.2,
  yline_width = 2,
  y2line_width = 2,
  x_showspikes = TRUE,
  spike_dash = "solid",
  spike_col = NULL,
  x_spike_thickness = -2,
  tickfont_size = 16,
  x_tickmode = "auto",
  x_tickvals = NULL,
  x_ticktext = NULL,
  x_tickangle = NULL,
  legend_x = 0,
  legend_y = 1.1,
  legend_xanchor = "left",
  legend_yanchor = "top",
  legend_orientation = "h",
  margin = list(l = 75, r = 75, b = 75, t = 75),
  x_standoff = 20L,
  y_standoff = 20L,
  y2_standoff = 20L,
  hovermode = "x",
  displayModeBar = TRUE,
  modeBar_file_format = "svg",
  scrollZoom = TRUE,
  filename = NULL,
  file_width = 960,
  file_height = 500,
  file_scale = 1
)
}
\arguments{
\item{x}{Datetime vector or list of vectors.}

\item{y}{Numeric vector or named list of vectors: y-axis data.}

\item{x2}{Datetime vector or list of vectors, optional: must be provided if \code{y2} does not
correspond to values in \code{x}. A single x-axis will be drawn for all values in \code{x} and \code{x2}.}

\item{y2}{Numeric vector, optional: If provided, a second y-axis will be added to the right
side of the plot.}

\item{which_xy}{Integer vector: Indices of \code{x} and \code{y} to plot.
If not provided, will select up to the first two x-y traces.}

\item{which_xy2}{Integer vector: Indices of \code{x2} and \code{y2} to plot.
If not provided, will select up to the first two x2-y2 traces.}

\item{shade_bin}{Integer vector \{0, 1\}: Time points in \code{x} to shade on the plot. For example,
if there are 10 time points in \code{x}, and you want to shade time points 3 to 7,
\code{shade_bin = c(0, 0, 1, 1, 1, 1, 1, 0, 0, 0)}. Only set \code{shade_bin} or \code{shade_interval}, not
both.}

\item{shade_interval}{List of numeric vectors: Intervals to shade on the plot. Only set
\code{shade_bin} or \code{shade_interval}, not both.}

\item{shade_col}{Color: Color to shade intervals.}

\item{shade_x}{Numeric vector: x-values to use for shading.}

\item{shade_name}{Character: Name for shaded intervals.}

\item{shade_showlegend}{Logical: If TRUE, show legend for shaded intervals.}

\item{ynames}{Character vector, optional: Names for each vector in \code{y}.}

\item{y2names}{Character vector, optional: Names for each vector in \code{y2}.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{y2lab}{Character: y2-axis label.}

\item{xunits}{Character: x-axis units.}

\item{yunits}{Character: y-axis units.}

\item{y2units}{Character: y2-axis units.}

\item{yunits_col}{Color for y-axis units.}

\item{y2units_col}{Color for y2-axis units.}

\item{zt}{Numeric vector: Zeitgeber time. If provided, will be shown on the x-axis instead of
\code{x}. To be used only with a single \code{x} vector and no \code{x2}.}

\item{show_zt}{Logical: If TRUE, show zt on x-axis, if zt is provided.}

\item{show_zt_every}{Optional integer: Show zt every \code{show_zt_every} ticks. If NULL, will be
calculated to be \code{x_nticks} +/- 1 if \code{x_nticks} is not 0, otherwise 12 +/- 1.}

\item{zt_nticks}{Integer: Number of zt ticks to show. Only used if \code{show_zt_every} is NULL.
The actual number of ticks shown will depend on the periodicity of zt, so that zt = 0 is always
included.}

\item{main}{Character: Main title.}

\item{main_y}{Numeric: Y position of main title.}

\item{main_yanchor}{Character: "top", "middle", "bottom".}

\item{x_nticks}{Integer: Number of ticks on x-axis.}

\item{y_nticks}{Integer: Number of ticks on y-axis.}

\item{show_rangeslider}{Logical: If TRUE, show a range slider.}

\item{slider_start}{Numeric: Start of range slider.}

\item{slider_end}{Numeric: End of range slider.}

\item{theme}{\code{Theme} object.}

\item{palette}{Character vector: Colors to be used to draw each vector in \code{y} and \code{y2}, in order.}

\item{font_size}{Numeric: Font size for text.}

\item{yfill}{Character: Fill type for y-axis: "none", "tozeroy", "tonexty".}

\item{y2fill}{Character: Fill type for y2-axis: "none", "tozeroy", "tonexty".}

\item{fill_alpha}{Numeric: Fill opacity for y-axis.}

\item{yline_width}{Numeric: Line width for y-axis lines.}

\item{y2line_width}{Numeric: Line width for y2-axis lines.}

\item{x_showspikes}{Logical: If TRUE, show spikes on x-axis.}

\item{spike_dash}{Character: Dash type for spikes: "solid", "dot", "dash", "longdash",
"dashdot", "longdashdot".}

\item{spike_col}{Color for spikes.}

\item{x_spike_thickness}{Numeric: Thickness of spikes. \code{-2} avoids drawing border around spikes.}

\item{tickfont_size}{Numeric: Font size for tick labels.}

\item{x_tickmode}{Character: "auto", "linear", "array".}

\item{x_tickvals}{Numeric vector: Tick positions.}

\item{x_ticktext}{Character vector: Tick labels.}

\item{x_tickangle}{Numeric: Angle of tick labels.}

\item{legend_x}{Numeric: X position of legend.}

\item{legend_y}{Numeric: Y position of legend.}

\item{legend_xanchor}{Character: "left", "center", "right".}

\item{legend_yanchor}{Character: "top", "middle", "bottom".}

\item{legend_orientation}{Character: "v" for vertical, "h" for horizontal.}

\item{margin}{Named list with 4 numeric values: "l", "r", "t", "b" for left, right, top, bottom
margins.}

\item{x_standoff}{Numeric: Distance from x-axis to x-axis label.}

\item{y_standoff}{Numeric: Distance from y-axis to y-axis label.}

\item{y2_standoff}{Numeric: Distance from y2-axis to y2-axis label.}

\item{hovermode}{Character: "closest", "x", "x unified".}

\item{displayModeBar}{Logical: If TRUE, display plotly mode bar.}

\item{modeBar_file_format}{Character: "png", "svg", "jpeg", "webp", "pdf": file format for mode
bar image export.}

\item{scrollZoom}{Logical: If TRUE, enable zooming by scrolling.}

\item{filename}{Character: Path to save the plot image.}

\item{file_width}{Numeric: Width of the saved plot image.}

\item{file_height}{Numeric: Height of the saved plot image.}

\item{file_scale}{Numeric: Scale of the saved plot image.}
}
\value{
\code{plotly} object.
}
\description{
Plot timeseries data
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
datetime <- seq(
  as.POSIXct("2020-01-01 00:00"),
  as.POSIXct("2020-01-02 00:00"),
  by = "hour"
)
df <- data.frame(
  datetime = datetime,
  value1 = rnorm(length(datetime)),
  value2 = rnorm(length(datetime))
)
draw_xt(df, x = df[, 1], y = df[, 2:3])
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/dt_describe.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_describe}
\alias{dt_describe}
\title{Describe data.table}
\usage{
dt_describe(x, verbosity = 1L)
}
\arguments{
\item{x}{data.table: Input data.table.}

\item{verbosity}{Integer: If > 0, print output to console.}
}
\value{
List with three data.tables: Numeric, Categorical, and Date.
}
\description{
Describe data.table
}
\examples{
library(data.table)
origin <- as.POSIXct("2022-01-01 00:00:00", tz = "America/Los_Angeles")
x <- data.table(
  ID = paste0("ID", 1:10),
  V1 = rnorm(10),
  V2 = rnorm(10, 20, 3),
  V1_datetime = as.POSIXct(
    seq(
      1, 1e7,
      length.out = 10
    ),
    origin = origin
  ),
  V2_datetime = as.POSIXct(
    seq(
      1, 1e7,
      length.out = 10
    ),
    origin = origin
  ),
  C1 = sample(c("alpha", "beta", "gamma"), 10, TRUE),
  F1 = factor(sample(c("delta", "epsilon", "zeta"), 10, TRUE))
)
}
\author{
EDG
}


================================================
FILE: man/dt_inspect_types.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_inspect_types}
\alias{dt_inspect_types}
\title{Inspect column types}
\usage{
dt_inspect_types(x, cols = NULL, verbosity = 1L)
}
\arguments{
\item{x}{data.table: Input data.table.}

\item{cols}{Character vector: columns to inspect.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
Character vector.
}
\description{
Will attempt to identify columns that should be numeric but are either character or
factor by running \link{inspect_type} on each column.
}
\examples{
library(data.table)
x <- data.table(
  id = 8001:8006,
  a = c("3", "5", "undefined", "21", "4", NA),
  b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"),
  c = c(1, 2, 3, 4, 5, 6)
)
dt_inspect_types(x)
}
\author{
EDG
}


================================================
FILE: man/dt_keybin_reshape.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_keybin_reshape}
\alias{dt_keybin_reshape}
\title{Long to wide key-value reshaping}
\usage{
dt_keybin_reshape(
  x,
  id_name,
  key_name,
  positive = 1,
  negative = 0,
  xname = NULL,
  verbosity = 1L
)
}
\arguments{
\item{x}{\code{data.table} object.}

\item{id_name}{Character: Name of column in \code{x} that defines the IDs
identifying individual rows.}

\item{key_name}{Character: Name of column in \code{x} that holds the key.}

\item{positive}{Numeric or Character: Used to fill id ~ key combination
present in the long format input \code{x}.}

\item{negative}{Numeric or Character: Used to fill id ~ key combination
NOT present in the long format input \code{x}.}

\item{xname}{Character: Name of \code{x} to be used in messages.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{data.table} in wide format.
}
\description{
Reshape a long format \code{data.table} using key-value pairs with
\code{data.table::dcast}
}
\examples{
library(data.table)
x <- data.table(
  ID = rep(1:3, each = 2),
  Dx = c("A", "C", "B", "C", "D", "A")
)
dt_keybin_reshape(x, id_name = "ID", key_name = "Dx")
}
\author{
EDG
}


================================================
FILE: man/dt_merge.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_merge}
\alias{dt_merge}
\title{Merge data.tables}
\usage{
dt_merge(
  left,
  right,
  on = NULL,
  left_on = NULL,
  right_on = NULL,
  how = "left",
  left_name = NULL,
  right_name = NULL,
  left_suffix = NULL,
  right_suffix = NULL,
  verbosity = 1L,
  ...
)
}
\arguments{
\item{left}{data.table}

\item{right}{data.table}

\item{on}{Character: Name of column to join on.}

\item{left_on}{Character: Name of column on left table.}

\item{right_on}{Character: Name of column on right table.}

\item{how}{Character: Type of join: "inner", "left", "right", "outer".}

\item{left_name}{Character: Name of left table.}

\item{right_name}{Character: Name of right table.}

\item{left_suffix}{Character: If provided, add this suffix to all left column names,
excluding on/left_on.}

\item{right_suffix}{Character: If provided, add this suffix to all right column names,
excluding on/right_on.}

\item{verbosity}{Integer: Verbosity level.}

\item{...}{Additional arguments to be passed to \code{data.table::merge}.}
}
\value{
Merged data.table.
}
\description{
Merge data.tables
}
\examples{
library(data.table)
xleft <- data.table(ID = 1:5, Alpha = letters[1:5])
xright <- data.table(ID = c(3, 4, 5, 6), Beta = LETTERS[3:6])
xlr_inner <- dt_merge(xleft, xright, on = "ID", how = "inner")
}
\author{
EDG
}


================================================
FILE: man/dt_names_by_attr.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_names_by_attr}
\alias{dt_names_by_attr}
\title{List column names by attribute}
\usage{
dt_names_by_attr(x, attribute, exact = TRUE, sorted = TRUE)
}
\arguments{
\item{x}{data.table: Input data.table.}

\item{attribute}{Character: name of attribute.}

\item{exact}{Logical: If TRUE, use exact matching.}

\item{sorted}{Logical: If TRUE, sort the output.}
}
\value{
Character vector.
}
\description{
List column names by attribute
}
\examples{
library(data.table)
x <- data.table(
  id = 1:5,
  sbp = rnorm(5, 120, 15),
  dbp = rnorm(5, 80, 10),
  paO2 = rnorm(5, 90, 10),
  paCO2 = rnorm(5, 40, 5)
)
setattr(x[["id"]], "source", "demographics")
setattr(x[["sbp"]], "source", "outpatient")
setattr(x[["dbp"]], "source", "outpatient")
setattr(x[["paO2"]], "source", "icu")
setattr(x[["paCO2"]], "source", "icu")

dt_names_by_attr(x, "source", "outpatient")
}
\author{
EDG
}


================================================
FILE: man/dt_nunique_perfeat.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_nunique_perfeat}
\alias{dt_nunique_perfeat}
\title{Number of unique values per feature}
\usage{
dt_nunique_perfeat(x, excludeNA = FALSE, limit = 20L, verbosity = 1L)
}
\arguments{
\item{x}{data.table: Input data.table.}

\item{excludeNA}{Logical: If TRUE, exclude NA values.}

\item{limit}{Integer: Print up to this many features. Set to -1L to print all.}

\item{verbosity}{Integer: If > 0, print output to console.}
}
\value{
Named integer vector of length \code{NCOL(x)} with number of unique values per column/feature, invisibly.
}
\description{
Number of unique values per feature
}
\examples{
library(data.table)
ir <- as.data.table(iris)
dt_nunique_perfeat(ir)
}
\author{
EDG
}


================================================
FILE: man/dt_pctmatch.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_pctmatch}
\alias{dt_pctmatch}
\title{Get N and percent match of values between two columns of two data.tables}
\usage{
dt_pctmatch(x, y, on = NULL, left_on = NULL, right_on = NULL, verbosity = 1L)
}
\arguments{
\item{x}{data.table: First input data.table.}

\item{y}{data.table: Second input data.table.}

\item{on}{Integer or character: column to read in \code{x} and \code{y}, if it is the
same}

\item{left_on}{Integer or character: column to read in \code{x}}

\item{right_on}{Integer or character: column to read in \code{y}}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
list.
}
\description{
Get N and percent match of values between two columns of two data.tables
}
\examples{
library(data.table)
x <- data.table(ID = 1:5, Alpha = letters[1:5])
y <- data.table(ID = c(3, 4, 5, 6), Beta = LETTERS[3:6])
dt_pctmatch(x, y, on = "ID")
}
\author{
EDG
}


================================================
FILE: man/dt_pctmissing.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_pctmissing}
\alias{dt_pctmissing}
\title{Get percent of missing values from every column}
\usage{
dt_pctmissing(x, verbosity = 1L)
}
\arguments{
\item{x}{data.frame or data.table}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
list
}
\description{
Get percent of missing values from every column
}
\examples{
library(data.table)
x <- data.table(a = c(1, 2, NA, 4), b = c(NA, NA, 3, 4), c = c("A", "B", "C", NA))
dt_pctmissing(x)
}
\author{
EDG
}


================================================
FILE: man/dt_set_autotypes.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_set_autotypes}
\alias{dt_set_autotypes}
\title{Set column types automatically}
\usage{
dt_set_autotypes(x, cols = NULL, verbosity = 1L)
}
\arguments{
\item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}, if needed.}

\item{cols}{Character vector: columns to work on. If not defined, will work on all
columns}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
data.table, invisibly.
}
\description{
This function inspects a data.table and attempts to identify columns that should be
numeric but have been read in as character, and fixes their type \emph{\strong{in-place}}.
This can happen when one or more fields contain non-numeric characters, for example.
}
\examples{
library(data.table)
x <- data.table(
  id = 8001:8006,
  a = c("3", "5", "undefined", "21", "4", NA),
  b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"),
  c = c(1, 2, 3, 4, 5, 6)
)
str(x)
# ***in-place*** operation means no assignment is needed
dt_set_autotypes(x)
str(x)

# Try excluding column 'a' from autotyping
x <- data.table(
  id = 8001:8006,
  a = c("3", "5", "undefined", "21", "4", NA),
  b = c("mango", "banana", "tangerine", NA, "apple", "kiwi"),
  c = c(1, 2, 3, 4, 5, 6)
)
str(x)
# exclude column 'a' from autotyping
dt_set_autotypes(x, cols = setdiff(names(x), "a"))
str(x)
}
\author{
EDG
}


================================================
FILE: man/dt_set_clean_all.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_set_clean_all}
\alias{dt_set_clean_all}
\title{Clean column names and factor levels \emph{\strong{in-place}}}
\usage{
dt_set_clean_all(x, prefix_digits = NA)
}
\arguments{
\item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}, if needed.}

\item{prefix_digits}{Character: prefix to add to names beginning with a
digit. Set to NA to skip}
}
\value{
Nothing, modifies \code{x} \emph{\strong{in-place}}.
}
\description{
Clean column names and factor levels \emph{\strong{in-place}}
}
\examples{
library(data.table)
x <- as.data.table(iris)
levels(x[["Species"]]) <- c("setosa:iris", "versicolor$iris", "virginica iris")
names(x)
levels(x[["Species"]])
# ***in-place*** operation means no assignment is needed
dt_set_clean_all(x)
names(x)
levels(x[["Species"]])
}
\author{
EDG
}


================================================
FILE: man/dt_set_cleanfactorlevels.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_set_cleanfactorlevels}
\alias{dt_set_cleanfactorlevels}
\title{Clean factor levels of data.table \emph{\strong{in-place}}}
\usage{
dt_set_cleanfactorlevels(x, prefix_digits = NA)
}
\arguments{
\item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}.}

\item{prefix_digits}{Character: If not NA, add this prefix to all factor levels that
are numbers}
}
\value{
Nothing, modifies \code{x} \emph{\strong{in-place}}.
}
\description{
Finds all factors in a data.table and cleans factor levels to include
only underscore symbols
}
\examples{
library(data.table)
x <- as.data.table(iris)
levels(x[["Species"]]) <- c("setosa:iris", "versicolor$iris", "virginica iris")
levels(x[["Species"]])
dt_set_cleanfactorlevels(x)
levels(x[["Species"]])
}
\author{
EDG
}


================================================
FILE: man/dt_set_logical2factor.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.table.R
\name{dt_set_logical2factor}
\alias{dt_set_logical2factor}
\title{Convert data.table logical columns to factors}
\usage{
dt_set_logical2factor(
  x,
  cols = NULL,
  labels = c("False", "True"),
  maintain_attributes = TRUE,
  fillNA = NULL
)
}
\arguments{
\item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}.}

\item{cols}{Optional Integer or character: columns to convert. If NULL, operates on all
logical columns.}

\item{labels}{Character: labels for factor levels.}

\item{maintain_attributes}{Logical: If TRUE, maintain column attributes.}

\item{fillNA}{Optional Character: If not NULL, fill NA values with this constant.}
}
\value{
data.table, invisibly.
}
\description{
Convert data.table logical columns to factors with custom labels \emph{\strong{in-place}}
}
\examples{
library(data.table)
x <- data.table(a = 1:5, b = c(TRUE, FALSE, FALSE, FALSE, TRUE))
x
dt_set_logical2factor(x)
x
z <- data.table(
  alpha = 1:5,
  beta = c(TRUE, FALSE, TRUE, NA, TRUE),
  gamma = c(FALSE, FALSE, TRUE, FALSE, NA)
)
# You can usee fillNA to fill NA values with a constant
dt_set_logical2factor(z, cols = "beta", labels = c("No", "Yes"), fillNA = "No")
z
w <- data.table(mango = 1:5, banana = c(FALSE, FALSE, TRUE, TRUE, FALSE))
w
dt_set_logical2factor(w, cols = 2, labels = c("Ugh", "Huh"))
w
# Column attributes are maintained by default:
z <- data.table(
  alpha = 1:5,
  beta = c(TRUE, FALSE, TRUE, NA, TRUE),
  gamma = c(FALSE, FALSE, TRUE, FALSE, NA)
)
for (i in seq_along(z)) setattr(z[[i]], "source", "Guava")
str(z)
dt_set_logical2factor(z, cols = "beta", labels = c("No", "Yes"))
str(z)
}
\author{
EDG
}


================================================
FILE: man/dt_set_one_hot.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/preprocess.R
\name{dt_set_one_hot}
\alias{dt_set_one_hot}
\title{Convert data.table's factor to one-hot encoding \emph{\strong{in-place}}}
\usage{
dt_set_one_hot(x, xname = NULL, verbosity = 1L)
}
\arguments{
\item{x}{data.table: Input data.table. Will be modified \emph{\strong{in-place}}.}

\item{xname}{Character, optional: Dataset name.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
The input, invisibly, after it has been modified \emph{\strong{in-place}}.
}
\description{
Convert data.table's factor to one-hot encoding \emph{\strong{in-place}}
}
\examples{
ir <- data.table::as.data.table(iris)
# dt_set_one_hot operates ***in-place***; therefore no assignment is used:
dt_set_one_hot(ir)
ir
}
\author{
EDG
}


================================================
FILE: man/exc.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{exc}
\alias{exc}
\title{Exclude columns by character or numeric vector.}
\usage{
exc(x, idx)
}
\arguments{
\item{x}{tabular data.}

\item{idx}{Character or numeric vector: Column names or indices to exclude.}
}
\value{
data.frame, tibble, or data.table.
}
\description{
Exclude columns by character or numeric vector.
}
\examples{
exc(iris, "Species") |> head()
exc(iris, c(1, 3)) |> head()
}
\author{
EDG
}


================================================
FILE: man/feature_matrix.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/preprocess.R
\name{feature_matrix}
\alias{feature_matrix}
\title{Convert tabular data to feature matrix}
\usage{
feature_matrix(x)
}
\arguments{
\item{x}{tabular data: Input data to convert to a feature matrix.}
}
\value{
Matrix with features. Factors are one-hot encoded, if present.
}
\description{
Convert a tabular dataset to a matrix, one-hot encoding factors, if present.
}
\details{
This is a convenience function that uses  \code{\link[=features]{features()}}, \code{\link[=preprocess]{preprocess()}}, \code{as.matrix()}.
}
\examples{
# reorder columns so that we have a categorical feature
x <- set_outcome(iris, "Sepal.Length")
feature_matrix(x) |> head()
}
\author{
EDG
}


================================================
FILE: man/feature_names.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{feature_names}
\alias{feature_names}
\title{Get feature names}
\usage{
feature_names(x)
}
\arguments{
\item{x}{tabular data.}
}
\value{
Character vector of feature names.
}
\description{
Returns all column names except the last one
}
\details{
This applied to tabular datasets used for supervised learning in rtemis,
where, by convention, the last column is the outcome variable and all other columns
are features.
}
\examples{
feature_names(iris)
}
\author{
EDG
}


================================================
FILE: man/features.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{features}
\alias{features}
\title{Get features from tabular data}
\usage{
features(x)
}
\arguments{
\item{x}{tabular data: Input data to get features from.}
}
\value{
Object of the same class as the input, after removing the last column.
}
\description{
Returns all columns except the last one.
}
\details{
This can be applied to tabular datasets used for supervised learning in \pkg{rtemis},
where, by convention, the last column is the outcome variable and all other columns
are features.
}
\examples{
features(iris) |> head()
}
\author{
EDG
}


================================================
FILE: man/get_factor_names.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{get_factor_names}
\alias{get_factor_names}
\title{Get factor names}
\usage{
get_factor_names(x)
}
\arguments{
\item{x}{tabular data.}
}
\value{
Character vector of factor names.
}
\description{
Get factor names
}
\details{
This applied to tabular datasets used for supervised learning in rtemis,
where, by convention, the last column is the outcome variable and all other columns
are features.
}
\examples{
get_factor_names(iris)
}
\author{
EDG
}


================================================
FILE: man/get_mode.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{get_mode}
\alias{get_mode}
\title{Get the mode of a factor or integer}
\usage{
get_mode(x, na.rm = TRUE, getlast = TRUE, retain_class = TRUE)
}
\arguments{
\item{x}{Vector, factor or integer: Input data.}

\item{na.rm}{Logical: If TRUE, exclude NAs (using \code{na.exclude(x)}).}

\item{getlast}{Logical: If TRUE, get the last value in case of ties.}

\item{retain_class}{Logical: If TRUE, output is always same class as input.}
}
\value{
The mode of \code{x}
}
\description{
Returns the mode of a factor or integer
}
\examples{
x <- c(9, 3, 4, 4, 0, 2, 2, NA)
get_mode(x)
x <- c(9, 3, 2, 2, 0, 4, 4, NA)
get_mode(x)
get_mode(x, getlast = FALSE)
}
\author{
EDG
}


================================================
FILE: man/get_msg_sink.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/msg.R
\name{get_msg_sink}
\alias{get_msg_sink}
\title{Get the current rtemis message sink}
\usage{
get_msg_sink()
}
\value{
The currently registered sink function, or \code{NULL} if none is set.
}
\description{
Get the current rtemis message sink
}
\seealso{
\code{\link[=set_msg_sink]{set_msg_sink()}}, \code{\link[=with_msg_sink]{with_msg_sink()}}.
}
\author{
EDG
}


================================================
FILE: man/get_palette.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_palettes.R
\name{get_palette}
\alias{get_palette}
\title{Get Color Palette}
\usage{
get_palette(palette = NULL, verbosity = 1L)
}
\arguments{
\item{palette}{Character: Name of palette to return. Default = NULL: available palette
names are printed and no palette is returned.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
Character vector of colors for the specified palette, or invisibly returns
list of available palettes if \code{palette = NULL}.
}
\description{
\code{get_palette()} returns a color palette (character vector of colors).
Without arguments, prints names of available color palettes.
Each palette is a named list of hexadecimal color definitions which can be used with
any graphics function.
}
\examples{
# Print available palettes
get_palette()
# Get the Imperial palette
get_palette("imperial")
}
\author{
EDG
}


================================================
FILE: man/getnames.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_df.R
\name{getnames}
\alias{getnames}
\alias{getfactornames}
\alias{getnumericnames}
\alias{getlogicalnames}
\alias{getcharacternames}
\alias{getdatenames}
\title{Get names by string matching or class}
\usage{
getnames(
  x,
  pattern = NULL,
  starts_with = NULL,
  ends_with = NULL,
  ignore_case = TRUE
)

getfactornames(x)

getnumericnames(x)

getlogicalnames(x)

getcharacternames(x)

getdatenames(x)
}
\arguments{
\item{x}{object with \code{names()} method.}

\item{pattern}{Character: pattern to match anywhere in names of x.}

\item{starts_with}{Character: pattern to match in the beginning of names of x.}

\item{ends_with}{Character: pattern to match at the end of names of x.}

\item{ignore_case}{Logical: If TRUE, well, ignore case.}
}
\value{
Character vector of matched names.
}
\description{
Get names by string matching or class
}
\details{
For \code{getnames()} only:
\code{pattern}, \code{starts_with}, and \code{ends_with} are applied sequentially.
If more than one is provided, the result will be the intersection of all matches.
}
\examples{
getnames(iris, starts_with = "Sepal")
getnames(iris, ends_with = "Width")
getfactornames(iris)
getnumericnames(iris)
}
\author{
EDG
}


================================================
FILE: man/getnamesandtypes.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_df.R
\name{getnamesandtypes}
\alias{getnamesandtypes}
\title{Get data.frame names and types}
\usage{
getnamesandtypes(x)
}
\arguments{
\item{x}{data.frame / data.table or similar}
}
\value{
character vector of column names with attribute "type" holding the class of each
column
}
\description{
Get data.frame names and types
}
\examples{
getnamesandtypes(iris)
}
\author{
EDG
}


================================================
FILE: man/grapes-BC-grapes.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/preprocess.R
\name{\%BC\%}
\alias{\%BC\%}
\title{Binary matrix times character vector}
\usage{
x \%BC\% labels
}
\arguments{
\item{x}{A binary matrix or data.frame}

\item{labels}{Character vector length equal to \code{ncol(x)}}
}
\value{
a character vector
}
\description{
Binary matrix times character vector
}
\author{
EDG
}


================================================
FILE: man/inc.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{inc}
\alias{inc}
\title{Select (include) columns by character or numeric vector.}
\usage{
inc(x, idx)
}
\arguments{
\item{x}{tabular data.}

\item{idx}{Character or numeric vector: Column names or indices to include.}
}
\value{
data.frame, tibble, or data.table.
}
\description{
Select (include) columns by character or numeric vector.
}
\examples{
inc(iris, c(3, 4)) |> head()
inc(iris, c("Sepal.Length", "Species")) |> head()
}
\author{
EDG
}


================================================
FILE: man/index_col_by_attr.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.R
\name{index_col_by_attr}
\alias{index_col_by_attr}
\title{Index columns by attribute name & value}
\usage{
index_col_by_attr(x, name, value, exact = TRUE)
}
\arguments{
\item{x}{tabular data.}

\item{name}{Character: Name of attribute.}

\item{value}{Character: Value of attribute.}

\item{exact}{Logical: Passed to \code{attr} when retrieving attribute value. If \code{TRUE}, attribute
name must match \code{name} exactly, otherwise, partial match is allowed.}
}
\value{
Integer vector.
}
\description{
Index columns by attribute name & value
}
\examples{
library(data.table)
x <- data.table(
  id = 1:5,
  sbp = rnorm(5, 120, 15),
  dbp = rnorm(5, 80, 10),
  paO2 = rnorm(5, 90, 10),
  paCO2 = rnorm(5, 40, 5)
)
setattr(x[["sbp"]], "source", "outpatient")
setattr(x[["dbp"]], "source", "outpatient")
setattr(x[["paO2"]], "source", "icu")
setattr(x[["paCO2"]], "source", "icu")
index_col_by_attr(x, "source", "icu")
}
\author{
EDG
}


================================================
FILE: man/init_project_dir.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{init_project_dir}
\alias{init_project_dir}
\title{Initialize Project Directory}
\usage{
init_project_dir(path, output_dir = "Out", verbosity = 1L)
}
\arguments{
\item{path}{Character: Path to initialize project directory in.}

\item{output_dir}{Character: Name of output directory to create.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
Character: the path where the project directory was initialized, invisibly.
}
\description{
Initializes Directory Structure: "R", "Data", "Results"
}
\examples{
\dontrun{
# Will create "my_project" directory with
init_project_dir("my_project")
}
}
\author{
EDG
}


================================================
FILE: man/inspect.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{inspect}
\alias{inspect}
\title{Inspect rtemis object}
\usage{
inspect(x)
}
\arguments{
\item{x}{R object to inspect.}
}
\value{
Called for side effect of printing information to console; returns character string
invisibly.
}
\description{
Inspect rtemis object
}
\examples{
inspect(iris)
}
\author{
EDG
}


================================================
FILE: man/inspect_type.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.R
\name{inspect_type}
\alias{inspect_type}
\title{Inspect character and factor vector}
\usage{
inspect_type(x, xname = NULL, verbosity = 1L, thresh = 0.5, na.omit = TRUE)
}
\arguments{
\item{x}{Character or factor vector.}

\item{xname}{Character: Name of input vector \code{x}.}

\item{verbosity}{Integer: Verbosity level.}

\item{thresh}{Numeric: Threshold for determining whether to convert to numeric.}

\item{na.omit}{Logical: If TRUE, remove NA values before checking.}
}
\value{
Character.
}
\description{
Checks character or factor vector to determine whether it might be best to convert to
numeric.
}
\details{
All data can be represented as a character string. A numeric variable may be read as
a character variable if there are non-numeric characters in the data.
It is important to be able to automatically detect such variables and convert them,
which would mean introducing NA values.
}
\examples{
x <- c("3", "5", "undefined", "21", "4", NA)
inspect_type(x)
z <- c("mango", "banana", "tangerine", NA)
inspect_type(z)
}
\author{
EDG
}


================================================
FILE: man/is_constant.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{is_constant}
\alias{is_constant}
\title{Check if vector is constant}
\usage{
is_constant(x, skip_missing = FALSE)
}
\arguments{
\item{x}{Vector: Input}

\item{skip_missing}{Logical: If TRUE, skip NA values before test}
}
\value{
Logical.
}
\description{
Check if vector is constant
}
\examples{
x <- rep(9, 1000000)
is_constant(x)
x[10] <- NA
is_constant(x)
is_constant(x, skip_missing = TRUE)
}
\author{
EDG
}


================================================
FILE: man/labelify.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_strings.R
\name{labelify}
\alias{labelify}
\title{Format text for label printing}
\usage{
labelify(
  x,
  underscores_to_spaces = TRUE,
  dotsToSpaces = TRUE,
  toLower = FALSE,
  toTitleCase = TRUE,
  capitalize_strings = c("id"),
  stringsToSpaces = c("\\\\$", "`")
)
}
\arguments{
\item{x}{Character: Input}

\item{underscores_to_spaces}{Logical: If TRUE, convert underscores to spaces.}

\item{dotsToSpaces}{Logical: If TRUE, convert dots to spaces.}

\item{toLower}{Logical: If TRUE, convert to lowercase (precedes \code{toTitleCase}).
Default = FALSE (Good for getting all-caps words converted to title case, bad for abbreviations
you want to keep all-caps)}

\item{toTitleCase}{Logical: If TRUE, convert to Title Case. Default = TRUE (This does not change
all-caps words, set \code{toLower} to TRUE if desired)}

\item{capitalize_strings}{Character, vector: Always capitalize these strings, if present. Default = \code{"id"}}

\item{stringsToSpaces}{Character, vector: Replace these strings with spaces. Escape as needed for \code{gsub}.
Default = \code{"\\\\$"}, which formats common input of the type \code{data.frame$variable}}
}
\value{
Character vector.
}
\description{
Format text for label printing
}
\examples{
x <- c("county_name", "total.cost$", "age", "weight.kg")
labelify(x)
}
\author{
EDG
}


================================================
FILE: man/massGLM.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/massGLM.R
\name{massGLM}
\alias{massGLM}
\title{Mass-univariate GLM Analysis}
\usage{
massGLM(x, y, scale_y = NULL, center_y = NULL, verbosity = 1L)
}
\arguments{
\item{x}{tabular data: Predictor variables. Usually a small number of covariates.}

\item{y}{data.frame or similar: Each column is a different outcome. The function will train one
GLM for each column of \code{y}. Usually a large number of features.}

\item{scale_y}{Logical: If TRUE, scale each column of \code{y} to have mean 0 and sd 1. If \code{NULL},
defaults to TRUE if \code{y} is numeric, FALSE otherwise.}

\item{center_y}{Logical: If TRUE, center each column of \code{y} to have mean 0. If \code{NULL}, defaults
to TRUE if \code{scale_y} is TRUE, FALSE otherwise.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{MassGLM} object.
}
\description{
Mass-univariate GLM Analysis
}
\examples{
set.seed(2022)
y <- rnormmat(500, 40, return_df = TRUE)
x <- data.frame(
  x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500),
  x2 = y[[21]] + rnorm(500)
)
massmod <- massGLM(x, y)
# Print table of coefficients, p-values, etc. for all models
summary(massmod)
}
\author{
EDG
}


================================================
FILE: man/matchcases.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.R
\name{matchcases}
\alias{matchcases}
\title{Match cases by covariates}
\usage{
matchcases(
  target,
  pool,
  n_matches = 1,
  target_id = NULL,
  pool_id = NULL,
  exactmatch_factors = TRUE,
  exactmatch_cols = NULL,
  distmatch_cols = NULL,
  norepeats = TRUE,
  ignore_na = FALSE,
  verbosity = 1L
)
}
\arguments{
\item{target}{data.frame you are matching against.}

\item{pool}{data.frame you are looking for matches from.}

\item{n_matches}{Integer: Number of matches to return.}

\item{target_id}{Character: Column name in \code{target} that holds unique
cases IDs. Default = NULL, in which case integer case numbers will be used.}

\item{pool_id}{Character: Same as \code{target_id} for \code{pool}.}

\item{exactmatch_factors}{Logical: If TRUE, selected cases will have to
exactly match factors available in \code{target}.}

\item{exactmatch_cols}{Character: Names of columns that should be matched
exactly.}

\item{distmatch_cols}{Character: Names of columns that should be
distance-matched.}

\item{norepeats}{Logical: If TRUE, cases in \code{pool} can only be chosen
once.}

\item{ignore_na}{Logical: If TRUE, ignore NA values during exact matching.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
data.frame
}
\description{
Find one or more cases from a \code{pool} data.frame that match cases in a target
data.frame. Match exactly and/or by distance (sum of squared distances).
}
\examples{
set.seed(2021)
cases <- data.frame(
  PID = paste0("PID", seq(4)),
  Sex = factor(c(1, 1, 0, 0)),
  Handedness = factor(c(1, 1, 0, 1)),
  Age = c(21, 27, 39, 24),
  Var = c(.7, .8, .9, .6),
  Varx = rnorm(4)
)
controls <- data.frame(
  CID = paste0("CID", seq(50)),
  Sex = factor(sample(c(0, 1), 50, TRUE)),
  Handedness = factor(sample(c(0, 1), 50, TRUE, c(.1, .9))),
  Age = sample(16:42, 50, TRUE),
  Var = rnorm(50),
  Vary = rnorm(50)
)

mc <- matchcases(cases, controls, 2, "PID", "CID")
}
\author{
EDG
}


================================================
FILE: man/mgetnames.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_df.R
\name{mgetnames}
\alias{mgetnames}
\title{Get names by string matching multiple patterns}
\usage{
mgetnames(
  x,
  pattern = NULL,
  starts_with = NULL,
  ends_with = NULL,
  ignore_case = TRUE,
  return_index = FALSE
)
}
\arguments{
\item{x}{Character vector or object with \code{names()} method.}

\item{pattern}{Character vector: pattern(s) to match anywhere in names of x.}

\item{starts_with}{Character: pattern to match in the beginning of names of x.}

\item{ends_with}{Character: pattern to match at the end of names of x.}

\item{ignore_case}{Logical: If TRUE, well, ignore case.}

\item{return_index}{Logical: If TRUE, return integer index of matches instead of names.}
}
\value{
Character vector of matched names or integer index.
}
\description{
Get names by string matching multiple patterns
}
\details{
\code{pattern}, \code{starts_with}, and \code{ends_with} are applied and the union of all matches is returned.
\code{pattern} can be a character vector of multiple patterns to match.
}
\examples{
mgetnames(iris, pattern = c("Sepal", "Petal"))
mgetnames(iris, starts_with = "Sepal")
mgetnames(iris, ends_with = "Width")
}
\author{
EDG
}


================================================
FILE: man/names_by_class.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.R
\name{names_by_class}
\alias{names_by_class}
\title{List column names by class}
\usage{
names_by_class(x, sorted = TRUE, item_format = highlight, maxlength = 24)
}
\arguments{
\item{x}{tabular data.}

\item{sorted}{Logical: If TRUE, sort the output}

\item{item_format}{Function: Function to format each item}

\item{maxlength}{Integer: Maximum number of items to print}
}
\value{
\code{NULL}, invisibly.
}
\description{
List column names by class
}
\examples{
names_by_class(iris)
}
\author{
EDG
}


================================================
FILE: man/one_hot2factor.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/preprocess.R
\name{one_hot2factor}
\alias{one_hot2factor}
\title{Convert one-hot encoded matrix to factor}
\usage{
one_hot2factor(x, labels = colnames(x))
}
\arguments{
\item{x}{one-hot encoded matrix or data.frame.}

\item{labels}{Character vector of level names.}
}
\value{
A factor.
}
\description{
Convert one-hot encoded matrix to factor
}
\details{
If input has a single column, it will be converted to factor and
returned
}
\examples{
x <- data.frame(matrix(FALSE, 10, 3))
colnames(x) <- c("Dx1", "Dx2", "Dx3")
x$Dx1[1:3] <- x$Dx2[4:6] <- x$Dx3[7:10] <- TRUE
one_hot2factor(x)
}
\author{
EDG
}


================================================
FILE: man/outcome.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{outcome}
\alias{outcome}
\title{Get the outcome as a vector}
\usage{
outcome(x)
}
\arguments{
\item{x}{tabular data.}
}
\value{
Vector containing the last column of \code{x}.
}
\description{
Returns the last column of \code{x}, which is by convention the outcome variable.
}
\details{
This applied to tabular datasets used for supervised learning in rtemis,
where, by convention, the last column is the outcome variable and all other columns
are features.
}
\examples{
outcome(iris)
}
\author{
EDG
}


================================================
FILE: man/outcome_name.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{outcome_name}
\alias{outcome_name}
\title{Get the name of the last column}
\usage{
outcome_name(x)
}
\arguments{
\item{x}{tabular data.}
}
\value{
Name of the last column.
}
\description{
Get the name of the last column
}
\details{
This applied to tabular datasets used for supervised learning in rtemis,
where, by convention, the last column is the outcome variable and all other columns
are features.
}
\examples{
outcome_name(iris)
}
\author{
EDG
}


================================================
FILE: man/plot.MassGLM.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/08_MassUni.R
\name{plot.MassGLM}
\alias{plot.MassGLM}
\title{Plot MassGLM using volcano plot}
\usage{
\method{plot}{MassGLM}(
  x,
  coefname = NULL,
  p_adjust_method = "holm",
  p_transform = function(x) -log10(x),
  xlab = "Coefficient",
  ylab = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  verbosity = 1L,
  ...
)
}
\arguments{
\item{x}{MassGLM object trained using \link{massGLM}.}

\item{coefname}{Character: Name of coefficient to plot. If \code{NULL}, the first coefficient is used.}

\item{p_adjust_method}{Character: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" -
p-value adjustment method.}

\item{p_transform}{Function to transform p-values for plotting. Default is \code{function(x) -log10(x)}.}

\item{xlab}{Character: x-axis label.}

\item{ylab}{Character: y-axis label.}

\item{theme}{\code{Theme} object. Create using one of the \code{theme_} functions, e.g.
\code{theme_whitegrid()}.}

\item{verbosity}{Integer: Verbosity level.}

\item{...}{Additional arguments passed to \link{draw_volcano}.}
}
\value{
plotly object with volcano plot.
}
\description{
Plot MassGLM using volcano plot
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
set.seed(2019)
y <- rnormmat(500, 500, return_df = TRUE)
x <- data.frame(x = y[, 3] + y[, 5] - y[, 9] + y[, 15] + rnorm(500))
mod <- massGLM(x, y)
plot(mod)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/plot_manhattan.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R, R/08_MassUni.R
\name{plot_manhattan}
\alias{plot_manhattan}
\alias{plot_manhattan.MassGLM}
\title{Manhattan plot}
\usage{
plot_manhattan(x, ...)

plot_manhattan.MassGLM(
  x,
  coefname = NULL,
  p_adjust_method = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr",
    "none"),
  p_transform = function(x) -log10(x),
  ylab = NULL,
  theme = choose_theme(getOption("rtemis_theme")),
  col_pos = "#43A4AC",
  col_neg = "#FA9860",
  alpha = 0.8,
  ...
)
}
\arguments{
\item{x}{MassGLM object.}

\item{...}{Additional arguments passed to \link{draw_bar}.}

\item{coefname}{Character: Name of coefficient to plot. If \code{NULL}, the first coefficient is used.}

\item{p_adjust_method}{Character: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" -
p-value adjustment method.}

\item{p_transform}{Function to transform p-values for plotting. Default is \code{function(x) -log10(x)}.}

\item{ylab}{Character: y-axis label.}

\item{theme}{\code{Theme} object.}

\item{col_pos}{Character: Color for positive significant coefficients.}

\item{col_neg}{Character: Color for negative significant coefficients.}

\item{alpha}{Numeric: Transparency level for the bars.}
}
\value{
plotly object.
}
\description{
Draw a Manhattan plot for \code{MassGLM} objects created with \link{massGLM}.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
# x: outcome of interest as first column, optional covariates in the other columns
# y: features whose association with x we want to study
set.seed(2022)
y <- data.table(rnormmat(500, 40))
x <- data.table(
  x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500),
  x2 = y[[21]] + rnorm(500)
)
massmod <- massGLM(x, y)
plot_manhattan(massmod)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/plot_roc.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{plot_roc}
\alias{plot_roc}
\title{Plot ROC curve}
\usage{
plot_roc(x, ...)
}
\arguments{
\item{x}{\code{Classification} or \code{ClassificationRes} object.}

\item{...}{Additional arguments passed to the plotting function.}
}
\value{
A plotly object containing the ROC curve.
}
\description{
This generic is used to plot the ROC curve for a model.
}
\examples{
ir <- iris[51:150, ]
ir[["Species"]] <- factor(ir[["Species"]])
species_glm <- train(ir, algorithm = "GLM")
plot_roc(species_glm)
}
\author{
EDG
}


================================================
FILE: man/plot_true_pred.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{plot_true_pred}
\alias{plot_true_pred}
\title{Plot True vs. Predicted Values}
\usage{
plot_true_pred(x, ...)
}
\arguments{
\item{x}{\code{Supervised} or \code{SupervisedRes} object.}

\item{...}{Additional arguments passed to methods.}
}
\value{
plotly object.
}
\description{
Plot True vs. Predicted Values for Supervised objects.
For classification, it plots a confusion matrix.
For regression, it plots a scatter plot of true vs. predicted values.
}
\examples{
x <- set_outcome(iris, "Sepal.Length")
sepallength_glm <- train(x, algorithm = "GLM")
plot_true_pred(sepallength_glm)
}
\author{
EDG
}


================================================
FILE: man/plot_varimp.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{plot_varimp}
\alias{plot_varimp}
\title{Plot Variable Importance}
\usage{
plot_varimp(x, ...)
}
\arguments{
\item{x}{\code{Supervised} or \code{SupervisedRes} object.}

\item{...}{Additional arguments passed to methods.}
}
\value{
plotly object or invisible NULL if no variable importance is available.
}
\description{
Plot Variable Importance for Supervised objects.
}
\details{
This method calls \link{draw_varimp} internally.
If you pass an integer to the \code{plot_top} argument, the method will plot this many top features.
If you pass a number between 0 and 1 to the \code{plot_top} argument, the method will plot this
fraction of top features.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
ir <- set_outcome(iris, "Sepal.Length")
seplen_cart <- train(ir, algorithm = "CART")
plot_varimp(seplen_cart)
# Plot horizontally
plot_varimp(seplen_cart, orientation = "h")
plot_varimp(seplen_cart, orientation = "h", plot_top = 3L)
plot_varimp(seplen_cart, orientation = "h", plot_top = 0.5)
\dontshow{\}) # examplesIf}
}
\seealso{
\link{draw_varimp}, which is called by this method
}
\author{
EDG
}


================================================
FILE: man/preprocess.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R, R/preprocess.R
\name{preprocess}
\alias{preprocess}
\alias{preprocess.class_tabular.PreprocessorConfig}
\alias{preprocess.class_tabular.Preprocessor}
\title{Preprocess Data}
\usage{
preprocess(x, config, ...)

preprocess.class_tabular.PreprocessorConfig(
  x,
  config,
  dat_validation = NULL,
  dat_test = NULL,
  verbosity = 1L
)

preprocess.class_tabular.Preprocessor(x, config, verbosity = 1L)
}
\arguments{
\item{x}{data.frame, data.table, tbl_df (tabular data): Data to be preprocessed.}

\item{config}{\code{PreprocessorConfig}: Setup using \link{setup_Preprocessor} OR \code{Preprocessor} object:
Output of previous run of \code{preprocess}. This allows, for example, applying preprocessing to a
validation or test set using the same parameters as were used for the training set. In
particular, the same scale centers and coefficients will be applied to the new data.}

\item{...}{Not used.}

\item{dat_validation}{tabular data: Validation set data.}

\item{dat_test}{tabular data: Test set data.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{Preprocessor} object.
}
\description{
Preprocess data for analysis and visualization.
}
\details{
Methods are provided for preprocessing training set data, which accepts a \code{PreprocessorConfig}
object, and for preprocessing validation and test set data, which accept a \code{Preprocessor}
object.
}
\examples{
# Setup a `Preprocessor`: this outputs a `PreprocessorConfig` object.
prp <- setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE)

# Includes a long list of parameters
prp

# Resample iris to get train and test data
res <- resample(iris, setup_Resampler(seed = 2026))
iris_train <- iris[res[[1]], ]
iris_test <- iris[-res[[1]], ]

# Preprocess training data
iris_pre <- preprocess(iris_train, prp)

# Access preprocessd training data with `preprocessed()`
preprocessed(iris_pre)

# Apply the same preprocessing to test data
# In this case, the scale and center values from training data will be used.
# Note how `preprocess()` accepts either a `PreprocessorConfig` or `Preprocessor` object for
# this reason.
iris_test_pre <- preprocess(iris_test, iris_pre)

# Access preprocessed test data
preprocessed(iris_test_pre)
}
\author{
EDG
}


================================================
FILE: man/preprocessed.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{preprocessed}
\alias{preprocessed}
\title{Get preprocessed data from \code{Preprocessor}.}
\usage{
preprocessed(x)
}
\arguments{
\item{x}{\code{Preprocessor}: A \code{Preprocessor} object.}
}
\value{
data.frame: The preprocessed data.
}
\description{
Returns the preprocessed data from a \code{Preprocessor} object.
}
\examples{
prp <- preprocess(iris, setup_Preprocessor(scale = TRUE, center = TRUE))
preprocessed(prp)
}


================================================
FILE: man/present.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{present}
\alias{present}
\title{Present rtemis object}
\usage{
present(x, ...)
}
\arguments{
\item{x}{\code{Supervised} or \code{SupervisedRes} object or list of such objects.}

\item{...}{Additional arguments passed to the plotting function.}
}
\value{
A plotly object.
}
\description{
This generic is used to present an rtemis object by printing to console and drawing plots.
}
\examples{
\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
ir <- set_outcome(iris, "Sepal.Length")
seplen_lightrf <- train(ir, algorithm = "lightrf")
present(seplen_lightrf)
\dontshow{\}) # examplesIf}
}
\author{
EDG
}


================================================
FILE: man/previewcolor.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_color.R
\name{previewcolor}
\alias{previewcolor}
\title{Preview color}
\usage{
previewcolor(
  x,
  main = NULL,
  bg = "#333333",
  main_col = "#b3b3b3",
  main_x = 0.7,
  main_y = 0.2,
  main_adj = 0,
  main_cex = 0.9,
  main_font = 2,
  width = NULL,
  xlim = NULL,
  ylim = c(0, 2.2),
  asp = 1,
  labels_y = 1.55,
  label_cex = NULL,
  mar = c(0, 0, 0, 1),
  filename = NULL,
  pdf_width = 8,
  pdf_height = 2.5
)
}
\arguments{
\item{x}{Color, vector: One or more colors that R understands}

\item{main}{Character: Title. Default = NULL, which results in
\code{deparse(substitute(x))}}

\item{bg}{Background color.}

\item{main_col}{Color: Title color}

\item{main_x}{Float: x coordinate for \code{main}.}

\item{main_y}{Float: y coordinate for \code{main}.}

\item{main_adj}{Float: \code{adj} argument to mtext for \code{main}.}

\item{main_cex}{Float: character expansion factor for \code{main}.}

\item{main_font}{Integer, 1 or 2: Weight of \code{main} 1: regular, 2: bold.}

\item{width}{Float: Plot width. Default = NULL, i.e. set automatically}

\item{xlim}{Vector, length 2: x-axis limits. Default = NULL, i.e. set automatically}

\item{ylim}{Vector, length 2: y-axis limits.}

\item{asp}{Float: Plot aspect ratio.}

\item{labels_y}{Float: y coord for labels. Default = 1.55 (rhombi are fixed and range y .5 - 1.5)}

\item{label_cex}{Float: Character expansion for labels. Default = NULL, and is
calculated automatically based on length of \code{x}}

\item{mar}{Numeric vector, length 4: margin size.}

\item{filename}{Character: Path to save plot as PDF.}

\item{pdf_width}{Numeric: Width of PDF in inches.}

\item{pdf_height}{Numeric: Height of PDF in inches.}
}
\value{
Nothing, prints plot.
}
\description{
Preview one or multiple colors using little rhombi with their little labels up top
}
\examples{
previewcolor(get_palette("rtms"))
}
\author{
EDG
}


================================================
FILE: man/read.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read.R
\name{read}
\alias{read}
\title{Read tabular data from a variety of formats}
\usage{
read(
  filename,
  datadir = NULL,
  make_unique = FALSE,
  character2factor = FALSE,
  clean_colnames = TRUE,
  delim_reader = c("data.table", "vroom", "duckdb", "arrow"),
  xlsx_sheet = 1,
  sep = NULL,
  quote = "\\"",
  na_strings = c(""),
  output = c("data.table", "tibble", "data.frame"),
  attr = NULL,
  value = NULL,
  verbosity = 1L,
  fread_verbosity = 0L,
  timed = verbosity > 0L,
  ...
)
}
\arguments{
\item{filename}{Character: filename or full path if \code{datadir = NULL}.}

\item{datadir}{Character: Optional path to directory where \code{filename}
is located. If not specified, \code{filename} must be the full path.}

\item{make_unique}{Logical: If TRUE, keep unique rows only.}

\item{character2factor}{Logical: If TRUE, convert character variables to
factors.}

\item{clean_colnames}{Logical: If TRUE, clean columns names using
\link{clean_colnames}.}

\item{delim_reader}{Character: package to use for reading delimited data.}

\item{xlsx_sheet}{Integer or character: Name or number of XLSX sheet to read.}

\item{sep}{Single character: field separator. If \code{delim_reader = "fread"}
and \code{sep = NULL}, this defaults to "auto", otherwise defaults to ",".}

\item{quote}{Single character: quote character.}

\item{na_strings}{Character vector: Strings to be interpreted as NA values.
For \code{delim_reader = "duckdb"}, this must be a single string.}

\item{output}{Character: "default" or "data.table", If default, return the delim_reader's
default data structure, otherwise convert to data.table.}

\item{attr}{Character: Attribute to set (Optional).}

\item{value}{Character: Value to set (if \code{attr} is not NULL).}

\item{verbosity}{Integer: Verbosity level.}

\item{fread_verbosity}{Integer: Verbosity level. Passed to \code{data.table::fread}}

\item{timed}{Logical: If TRUE, time the process and print to console}

\item{...}{Additional arguments to pass to \code{data.table::fread},
\code{arrow::read_delim_arrow()}, \code{vroom::vroom()},
or \code{readxl::read_excel()}.}
}
\value{
data.frame, data.table, or tibble.
}
\description{
Read data and optionally clean column names, keep unique rows, and convert
characters to factors
}
\details{
\code{read} is a convenience function to read:
\itemize{
\item \strong{Delimited} files using \code{data.table:fread()}, \code{arrow:read_delim_arrow()},
\code{vroom::vroom()}, or \code{duckdb::duckdb_read_csv()}
\item \strong{ARFF} files using \code{farff::readARFF()}
\item \strong{Parquet} files using \code{arrow::read_parquet()}
\item \strong{XLSX} files using \code{readxl::read_excel()}
\item \strong{DTA} files from Stata using \code{haven::read_dta()}
\item \strong{FASTA} files using \code{seqinr::read.fasta()}
\item \strong{RDS} files using \code{readRDS()}
}
}
\examples{
\dontrun{
# Replace with your own data directory and filename
datadir <- "/Data"
dat <- read("iris.csv", datadir)
}
}
\author{
EDG
}


================================================
FILE: man/read_config.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/14_SuperConfig.R
\name{read_config}
\alias{read_config}
\title{Read \code{SuperConfig} from TOML file}
\usage{
read_config(file)
}
\arguments{
\item{file}{Character: Path to input TOML file.}
}
\value{
\code{SuperConfig} object.
}
\description{
Read \code{SuperConfig} object from TOML file that was written with \code{write_toml()}.
}
\examples{
# Create a SuperConfig object
x <- setup_SuperConfig(
  dat_training_path = "~/Data/iris.csv",
  algorithm = "LightRF",
  hyperparameters = setup_LightRF()
)
# Write TOML file
tmpdir <- tempdir()
tmpfile <- file.path(tmpdir, "rtemis_test.toml")
write_toml(x, tmpfile)
# Read config from TOML file
x_read <- read_config(tmpfile)
}
\author{
EDG
}


================================================
FILE: man/regression_metrics.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/metrics.R
\name{regression_metrics}
\alias{regression_metrics}
\title{Regression Metrics}
\usage{
regression_metrics(true, predicted, na.rm = TRUE, sample = NULL)
}
\arguments{
\item{true}{Numeric vector: True values.}

\item{predicted}{Numeric vector: Predicted values.}

\item{na.rm}{Logical: If TRUE, remove NA values before computation.}

\item{sample}{Character: Sample name (e.g. "training", "test").}
}
\value{
\code{RegressionMetrics} object.
}
\description{
Regression Metrics
}
\examples{
true <- rnorm(100)
predicted <- true + rnorm(100, sd = 0.5)
regression_metrics(true, predicted)
}
\author{
EDG
}


================================================
FILE: man/resample.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/resample.R
\name{resample}
\alias{resample}
\title{Resample data}
\usage{
resample(x, config = setup_Resampler(), verbosity = 1L)
}
\arguments{
\item{x}{Vector or data.frame: Usually the outcome; \code{NROW(x)} defines the sample size.}

\item{config}{Resampler object created by \link{setup_Resampler}.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{Resampler} object.
}
\description{
Create resamples of your data, e.g. for model building or validation.
"KFold" creates stratified folds, , "StratSub" creates stratified subsamples,
"Bootstrap" gives the standard bootstrap, i.e. random sampling with replacement,
while "StratBoot" uses StratSub and then randomly duplicates some of the training cases to
reach original length of input (default) or length defined by \code{target_length}.
}
\details{
Note that option 'KFold' may result in resamples of slightly different length. Avoid all
operations which rely on equal-length vectors. For example, you can't place resamples in a
data.frame, but must use a list instead.
}
\examples{
y <- rnorm(200)
# 10-fold (stratified)
y_10fold <- resample(y, setup_Resampler(10L, "kfold"))
y_10fold
# 25 stratified subsamples
y_25strat <- resample(y, setup_Resampler(25L, "stratsub"))
y_25strat
# 100 stratified bootstraps
y_100strat <- resample(y, setup_Resampler(100L, "stratboot"))
y_100strat
# LOOCV
y_loocv <- resample(y, setup_Resampler(type = "LOOCV"))
y_loocv
}
\author{
EDG
}


================================================
FILE: man/rnormmat.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{rnormmat}
\alias{rnormmat}
\title{Random Normal Matrix}
\usage{
rnormmat(
  nrow = 10,
  ncol = 10,
  mean = 0,
  sd = 1,
  return_df = FALSE,
  seed = NULL
)
}
\arguments{
\item{nrow}{Integer: Number of rows.}

\item{ncol}{Integer: Number of columns.}

\item{mean}{Float: Mean.}

\item{sd}{Float: Standard deviation.}

\item{return_df}{Logical: If TRUE, return data.frame, otherwise matrix.}

\item{seed}{Integer: Set seed for \code{rnorm}.}
}
\value{
\code{matrix} or \code{data.frame}.
}
\description{
Create a matrix or data frame of defined dimensions, whose columns are random normal vectors
}
\examples{
x <- rnormmat(20, 5, mean = 12, sd = 6, return_df = TRUE, seed = 2026)
x
}
\author{
EDG
}


================================================
FILE: man/rtemis-package.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rtemis-package.R
\docType{package}
\name{rtemis-package}
\alias{rtemis}
\alias{rtemis-package}
\title{\pkg{rtemis}: Advanced Machine Learning and Visualization}
\description{
Advanced Machine Learning & Visualization made efficient, accessible, reproducible
}
\section{Online Documentation and Vignettes}{

\url{https://docs.rtemis.org/r/ml}
}

\section{System Setup}{

There are some options you can define in your .Rprofile (usually found in your home directory),
so you do not have to define each time you execute a function.
\describe{
\item{rtemis_theme}{General plotting theme; set to e.g. "whiteigrid" or "darkgraygrid"}
\item{rtemis_font}{Font family to use in plots.}
\item{rtemis_palette}{Name of default palette to use in plots. See options by running \code{get_palette()}}
}
}

\section{Visualization}{

Graphics are handled using the \code{draw} family, which produces interactive plots primarily using
\code{plotly} and other packages.
}

\section{Supervised Learning}{

By convention, the last column of the data is the outcome variable, and all other columns are
predictors. Convenience function \link{set_outcome} can be used to move a specified column to the
end of the data.
Regression and Classification is performed using \code{train()}.
This function allows you to preprocess, train, tune, and test models on multiple resamples.
Use \link{available_supervised} to get a list of available algorithms
}

\section{Classification}{

For training of binary classification models, the outcome should be provided as a factor,
with the \emph{second} level of the factor being the 'positive' class.
}

\section{Clustering}{

Clustering is performed using \code{cluster()}.
Use \link{available_clustering} to get a list of available algorithms.
}

\section{Decomposition}{

Decomposition is performed using \code{decomp()}.
Use \link{available_decomposition} to get a list of available algorithms.
}

\section{Type Documentation}{

Function documentation includes input type (e.g. "Character", "Integer",
"Float"/"Numeric", etc).
When applicable, value ranges are provided in interval notation. For example, Float: [0, 1)
means floats between 0 and 1 including 0, but excluding 1.
Categorical variables may include set of allowed values using curly braces.
For example, Character: \{"future", "mirai", "none"\}.
}

\section{Tabular Data}{

\pkg{rtemis} internally uses methods for efficient handling of tabular data, with support for
\code{data.frame}, \code{data.table}, and \code{tibble}. If a function is documented as accepting
"tabular data", it should work with any of these data structures. If a function is documented
as accepting only one of these, then it should only be used with that structure.
For example, some optimized \code{data.table} operations that perform in-place modifications only
work with \code{data.table} objects.
}

\seealso{
Useful links:
\itemize{
  \item \url{https://www.rtemis.org}
  \item \url{https://docs.rtemis.org/r/ml}
  \item \url{https://docs.rtemis.org/r/ml-api/}
  \item Report bugs at \url{https://github.com/rtemis-org/rtemis/issues}
}

}
\author{
\strong{Maintainer}: E.D. Gennatas \email{gennatas@gmail.com} (\href{https://orcid.org/0000-0001-9280-3609}{ORCID}) [copyright holder]

Authors:
\itemize{
  \item E.D. Gennatas \email{gennatas@gmail.com} (\href{https://orcid.org/0000-0001-9280-3609}{ORCID}) [copyright holder]
}

}


================================================
FILE: man/rtemis_colors.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rtemis_color_system.R
\name{rtemis_colors}
\alias{rtemis_colors}
\title{rtemis Color System}
\format{
A named list with the following elements:
\describe{
\item{red}{"kaimana red"}
\item{blue}{"kaimana light blue"}
\item{green}{"kaimana medium green"}
\item{orange}{"coastside orange"}
\item{teal}{"rtemis teal"}
\item{purple}{"rtemis purple"}
\item{magenta}{"rtemis magenta"}
\item{highlight_col}{"highlight color"}
\item{object}{"rtemis teal"}
\item{info}{"lmd burgundy"}
\item{outer}{"kaimana red"}
\item{tuner}{"coastside orange"}
}
}
\usage{
rtemis_colors
}
\description{
A named list of colors used consistently across all packages
in the rtemis ecosystem.
}
\details{
Colors are provided as hex strings.
}
\examples{
rtemis_colors[["orange"]]

rtemis_colors[["teal"]]
}
\author{
EDG
}


================================================
FILE: man/rtversion.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{rtversion}
\alias{rtversion}
\title{Get rtemis version and system info}
\usage{
rtversion()
}
\value{
List: rtemis version and system info, invisibly.
}
\description{
Get rtemis version and system info
}
\examples{
rtversion()
}
\author{
EDG
}


================================================
FILE: man/runifmat.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{runifmat}
\alias{runifmat}
\title{Random Uniform Matrix}
\usage{
runifmat(
  nrow = 10,
  ncol = 10,
  min = 0,
  max = 1,
  return_df = FALSE,
  seed = NULL
)
}
\arguments{
\item{nrow}{Integer: Number of rows.}

\item{ncol}{Integer: Number of columns.}

\item{min}{Float: Min.}

\item{max}{Float: Max.}

\item{return_df}{Logical: If TRUE, return data.frame, otherwise matrix.}

\item{seed}{Integer: Set seed for \code{rnorm}.}
}
\value{
\code{matrix} or \code{data.frame}.
}
\description{
Create a matrix or data frame of defined dimensions, whose columns are random uniform vectors
}
\examples{
x <- runifmat(20, 5, min = 12, max = 18, return_df = TRUE, seed = 2026)
x
}
\author{
EDG
}


================================================
FILE: man/set_msg_sink.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/msg.R
\name{set_msg_sink}
\alias{set_msg_sink}
\title{Set the rtemis message sink}
\usage{
set_msg_sink(sink)
}
\arguments{
\item{sink}{Function or \code{NULL}.}
}
\value{
Previous sink (function or \code{NULL}), invisibly.
}
\description{
When set, \code{msg()}, \code{msg0()}, \code{msgstart()}, and \code{msgdone()} forward their
structured output through \code{sink} instead of writing to the R console. Used
by \code{rtemislive} to capture training-time messages and forward them over a
WebSocket connection. Pass \code{NULL} to restore default console output.
}
\details{
The sink function is called once per message with a single argument: a list
with fields
\itemize{
\item \code{text}: character. The formatted message body (no datetime prefix).
\item \code{caller}: character or \code{NA}. Calling function as identified by
\code{format_caller()}.
\item \code{ts}: character. Formatted timestamp (\code{"\%Y-\%m-\%d \%H:\%M:\%S"}).
\item \code{level}: character. One of \code{"info"} (\code{msg}/\code{msg0}), \code{"start"}
(\code{msgstart}), or \code{"done"} (\code{msgdone}).
}

When a sink is set, the console output path is \strong{skipped} for affected
calls. Errors thrown by the sink propagate to the caller of \code{msg()}.
}
\examples{
captured <- list()
set_msg_sink(function(m) captured[[length(captured) + 1L]] <<- m)
# msg("hello world")        # would append to `captured`
set_msg_sink(NULL)          # restore console output
}
\seealso{
\code{\link[=get_msg_sink]{get_msg_sink()}}, \code{\link[=with_msg_sink]{with_msg_sink()}}.
}
\author{
EDG
}


================================================
FILE: man/set_outcome.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_supervised.R
\name{set_outcome}
\alias{set_outcome}
\title{Move outcome to last column}
\usage{
set_outcome(dat, outcome_column)
}
\arguments{
\item{dat}{data.frame or similar.}

\item{outcome_column}{Character: Name of outcome column.}
}
\value{
object of same class as \code{data}
}
\description{
Move outcome to last column
}
\examples{
ir <- set_outcome(iris, "Sepal.Length")
head(ir)
}
\author{
EDG
}


================================================
FILE: man/setdiffsym.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{setdiffsym}
\alias{setdiffsym}
\title{Symmetric Set Difference}
\usage{
setdiffsym(x, y)
}
\arguments{
\item{x}{vector}

\item{y}{vector of same type as \code{x}}
}
\value{
Vector.
}
\description{
Symmetric Set Difference
}
\examples{
setdiff(1:10, 1:5)
setdiff(1:5, 1:10)
setdiffsym(1:10, 1:5)
setdiffsym(1:5, 1:10)
}
\author{
EDG
}


================================================
FILE: man/setup_CART.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_CART}
\alias{setup_CART}
\title{Setup CART Hyperparameters}
\usage{
setup_CART(
  cp = 0.01,
  maxdepth = 20L,
  minsplit = 2L,
  minbucket = 1L,
  prune_cp = NULL,
  method = "auto",
  model = TRUE,
  maxcompete = 4L,
  maxsurrogate = 5L,
  usesurrogate = 2L,
  surrogatestyle = 0L,
  xval = 0L,
  cost = NULL,
  ifw = FALSE
)
}
\arguments{
\item{cp}{(Tunable) Numeric: Complexity parameter.}

\item{maxdepth}{(Tunable) Integer: Maximum depth of tree.}

\item{minsplit}{(Tunable) Integer: Minimum number of observations in a node to split.}

\item{minbucket}{(Tunable) Integer: Minimum number of observations in a terminal node.}

\item{prune_cp}{(Tunable) Numeric: Complexity for cost-complexity pruning after tree is built}

\item{method}{String: Splitting method.}

\item{model}{Logical: If TRUE, return a model.}

\item{maxcompete}{Integer: Maximum number of competitive splits.}

\item{maxsurrogate}{Integer: Maximum number of surrogate splits.}

\item{usesurrogate}{Integer: Number of surrogate splits to use.}

\item{surrogatestyle}{Integer: Type of surrogate splits.}

\item{xval}{Integer: Number of cross-validation folds.}

\item{cost}{Numeric (>=0): One for each feature.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
CARTHyperparameters object.
}
\description{
Setup hyperparameters for CART training.
}
\details{
Get more information from \link[rpart:rpart]{rpart::rpart} and \link[rpart:rpart.control]{rpart::rpart.control}.
}
\examples{
cart_hyperparams <- setup_CART(cp = 0.01, maxdepth = 10L, ifw = TRUE)
cart_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_CMeans.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/09_ClusteringConfig.R
\name{setup_CMeans}
\alias{setup_CMeans}
\title{Setup CMeansConfig}
\usage{
setup_CMeans(
  k = 2L,
  max_iter = 100L,
  dist = c("euclidean", "manhattan"),
  method = c("cmeans", "ufcl"),
  m = 2,
  rate_par = NULL,
  weights = 1,
  control = list()
)
}
\arguments{
\item{k}{Integer: Number of clusters.}

\item{max_iter}{Integer: Maximum number of iterations.}

\item{dist}{Character: Distance measure to use: 'euclidean' or 'manhattan'.}

\item{method}{Character: "cmeans" - fuzzy c-means clustering; "ufcl": on-line update.}

\item{m}{Float (>1): Degree of fuzzification.}

\item{rate_par}{Float (0, 1): Learning rate for the online variant.}

\item{weights}{Float (>0): Case weights.}

\item{control}{List: Control config for clustering algorithm.}
}
\value{
CMeansConfig object.
}
\description{
Setup CMeansConfig
}
\examples{
cmeans_config <- setup_CMeans(k = 4L, dist = "euclidean")
cmeans_config
}
\author{
EDG
}


================================================
FILE: man/setup_DBSCAN.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/09_ClusteringConfig.R
\name{setup_DBSCAN}
\alias{setup_DBSCAN}
\title{Setup DBSCANConfig}
\usage{
setup_DBSCAN(
  eps = 0.5,
  min_points = 5L,
  weights = NULL,
  border_points = TRUE,
  search = c("kdtree", "linear", "dist"),
  bucket_size = 100L,
  split_rule = c("SUGGEST", "STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR"),
  approx = FALSE
)
}
\arguments{
\item{eps}{Float: Radius of neighborhood.}

\item{min_points}{Integer: Minimum number of points in a neighborhood to form a cluster.}

\item{weights}{Numeric vector: Weights for data points.}

\item{border_points}{Logical: If TRUE, assign border points to clusters.}

\item{search}{Character: Nearest neighbor search strategy: "kdtree", "linear", or "dist".}

\item{bucket_size}{Integer: Size of buckets for k-dtree search.}

\item{split_rule}{Character: Rule for splitting clusters: "SUGGEST", "STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR".}

\item{approx}{Logical: If TRUE, use approximate nearest neighbor search.}
}
\value{
DBSCANConfig object.
}
\description{
Setup DBSCANConfig
}
\examples{
dbscan_config <- setup_DBSCAN(eps = 0.5, min_points = 5L)
dbscan_config
}
\author{
EDG
}


================================================
FILE: man/setup_ExecutionConfig.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/01_ExecutionConfig.R
\name{setup_ExecutionConfig}
\alias{setup_ExecutionConfig}
\title{Setup Execution Configuration}
\usage{
setup_ExecutionConfig(
  backend = c("future", "mirai", "none"),
  n_workers = NULL,
  future_plan = NULL
)
}
\arguments{
\item{backend}{Character: Execution backend: "future", "mirai", or "none".}

\item{n_workers}{Integer: Number of workers for parallel execution. Only used if \verb{backend is "future"} or "mirai". Do not rely on the default value, set to an appropriate number depending
on your system.}

\item{future_plan}{Character: Future plan to use if \code{backend} is "future".}
}
\value{
\code{ExecutionConfig} object.
}
\description{
Setup Execution Configuration
}
\examples{
setup_ExecutionConfig(backend = "future", n_workers = 4L, future_plan = "multisession")
}
\author{
EDG
}


================================================
FILE: man/setup_GAM.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_GAM}
\alias{setup_GAM}
\title{Setup GAM Hyperparameters}
\usage{
setup_GAM(k = 5L, ifw = FALSE)
}
\arguments{
\item{k}{(Tunable) Integer: Number of knots.}

\item{ifw}{(Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
GAMHyperparameters object.
}
\description{
Setup hyperparameters for GAM training.
}
\details{
Get more information from \link[mgcv:gam]{mgcv::gam}.
}
\examples{
gam_hyperparams <- setup_GAM(k = 5L, ifw = FALSE)
gam_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_GLM.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_GLM}
\alias{setup_GLM}
\title{Setup GLM Hyperparameters}
\usage{
setup_GLM(ifw = FALSE)
}
\arguments{
\item{ifw}{(Tunable) Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
GLMHyperparameters object.
}
\description{
Setup hyperparameters for GLM training.
}
\examples{
glm_hyperparams <- setup_GLM(ifw = TRUE)
glm_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_GLMNET.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_GLMNET}
\alias{setup_GLMNET}
\title{Setup GLMNET Hyperparameters}
\usage{
setup_GLMNET(
  alpha = 1,
  family = NULL,
  offset = NULL,
  which_lambda_cv = "lambda.1se",
  nlambda = 100L,
  lambda = NULL,
  penalty_factor = NULL,
  standardize = TRUE,
  intercept = TRUE,
  ifw = TRUE
)
}
\arguments{
\item{alpha}{(Tunable) Numeric: Mixing parameter.}

\item{family}{Character: Family for GLMNET.}

\item{offset}{Numeric: Offset for GLMNET.}

\item{which_lambda_cv}{Character: Which lambda to use for prediction:
"lambda.1se" or "lambda.min"}

\item{nlambda}{Positive integer: Number of lambda values.}

\item{lambda}{Numeric: Lambda values.}

\item{penalty_factor}{Numeric: Penalty factor for each feature.}

\item{standardize}{Logical: If TRUE, standardize features.}

\item{intercept}{Logical: If TRUE, include intercept.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
GLMNETHyperparameters object.
}
\description{
Setup hyperparameters for GLMNET training.
}
\details{
Get more information from \link[glmnet:glmnet]{glmnet::glmnet}.
}
\examples{
glm_hyperparams <- setup_GLMNET(alpha = 1, ifw = TRUE)
glm_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_GridSearch.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/06_Tuner.R
\name{setup_GridSearch}
\alias{setup_GridSearch}
\title{Setup Grid Search Config}
\usage{
setup_GridSearch(
  resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold"),
  search_type = "exhaustive",
  randomize_p = NULL,
  metrics_aggregate_fn = "mean",
  metric = NULL,
  maximize = NULL
)
}
\arguments{
\item{resampler_config}{\code{ResamplerConfig} set by \link{setup_Resampler}.}

\item{search_type}{Character: "exhaustive" or "randomized". Type of
grid search to use. Exhaustive search will try all combinations of
config. Randomized will try a random sample of size
\code{randomize_p} * \verb{N of total combinations}}

\item{randomize_p}{Float (0, 1): For \code{search_type == "randomized"},
randomly test this proportion of combinations.}

\item{metrics_aggregate_fn}{Character: Name of function to use to aggregate error metrics.}

\item{metric}{Character: Metric to minimize or maximize.}

\item{maximize}{Logical: If TRUE, maximize \code{metric}, otherwise minimize it.}
}
\value{
A \code{GridSearchConfig} object.
}
\description{
Create a \code{GridSearchConfig} object that can be passed to \link{train}.
}
\examples{
gridsearch_config <- setup_GridSearch(
  resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold"),
  search_type = "exhaustive"
)
gridsearch_config
}
\author{
EDG
}


================================================
FILE: man/setup_HardCL.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/09_ClusteringConfig.R
\name{setup_HardCL}
\alias{setup_HardCL}
\title{Setup HardCLConfig}
\usage{
setup_HardCL(k = 3L, dist = c("euclidean", "manhattan"))
}
\arguments{
\item{k}{Number of clusters.}

\item{dist}{Character: Distance measure to use: 'euclidean' or 'manhattan'.}
}
\value{
HardCLConfig object.
}
\description{
Setup HardCLConfig
}
\examples{
hardcl_config <- setup_HardCL(k = 4L, dist = "euclidean")
hardcl_config
}
\author{
EDG
}


================================================
FILE: man/setup_ICA.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/11_DecompositionConfig.R
\name{setup_ICA}
\alias{setup_ICA}
\title{setup_ICA}
\usage{
setup_ICA(
  k = 3L,
  type = c("parallel", "deflation"),
  fun = c("logcosh", "exp"),
  alpha = 1,
  row_norm = TRUE,
  maxit = 100L,
  tol = 1e-04
)
}
\arguments{
\item{k}{Integer: Number of components.}

\item{type}{Character: Type of ICA: "parallel" or "deflation".}

\item{fun}{Character: ICA function: "logcosh", "exp".}

\item{alpha}{Numeric [1, 2]: Used in approximation to neg-entropy with \code{fun = "logcosh"}.}

\item{row_norm}{Logical: If TRUE, normalize rows of \code{x} before ICA.}

\item{maxit}{Integer: Maximum number of iterations.}

\item{tol}{Numeric: Tolerance.}
}
\value{
ICAConfig object.
}
\description{
Setup ICA config.
}
\examples{
ica_config <- setup_ICA(k = 3L)
ica_config
}
\author{
EDG
}


================================================
FILE: man/setup_Isomap.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/11_DecompositionConfig.R
\name{setup_Isomap}
\alias{setup_Isomap}
\title{Setup Isomap config.}
\usage{
setup_Isomap(
  k = 2L,
  dist_method = c("euclidean", "manhattan"),
  nsd = 0L,
  path = c("shortest", "extended")
)
}
\arguments{
\item{k}{Integer: Number of components.}

\item{dist_method}{Character: Distance method.}

\item{nsd}{Integer: Number of shortest dissimilarities retained.}

\item{path}{Character: Path argument for \code{vegan::isomap}.}
}
\value{
IsomapConfig object.
}
\description{
Setup Isomap config.
}
\examples{
isomap_config <- setup_Isomap(k = 3L)
isomap_config
}
\author{
EDG
}


================================================
FILE: man/setup_Isotonic.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_Isotonic}
\alias{setup_Isotonic}
\title{Setup Isotonic Hyperparameters}
\usage{
setup_Isotonic(ifw = FALSE)
}
\arguments{
\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
IsotonicHyperparameters object.
}
\description{
Setup hyperparameters for Isotonic Regression.
}
\details{
There are not hyperparameters for this algorithm at this moment.
}
\examples{
isotonic_hyperparams <- setup_Isotonic(ifw = TRUE)
isotonic_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_KMeans.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/09_ClusteringConfig.R
\name{setup_KMeans}
\alias{setup_KMeans}
\title{Setup KMeansConfig}
\usage{
setup_KMeans(k = 3L, dist = c("euclidean", "manhattan"))
}
\arguments{
\item{k}{Number of clusters.}

\item{dist}{Character: Distance measure to use: 'euclidean' or 'manhattan'.}
}
\value{
KMeansConfig object.
}
\description{
Setup KMeansConfig
}
\examples{
kmeans_config <- setup_KMeans(k = 4L, dist = "euclidean")
kmeans_config
}
\author{
EDG
}


================================================
FILE: man/setup_LightCART.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_LightCART}
\alias{setup_LightCART}
\title{Setup LightCART Hyperparameters}
\usage{
setup_LightCART(
  num_leaves = 32L,
  max_depth = -1L,
  lambda_l1 = 0,
  lambda_l2 = 0,
  min_data_in_leaf = 20L,
  max_cat_threshold = 32L,
  min_data_per_group = 100L,
  linear_tree = FALSE,
  objective = NULL,
  ifw = FALSE
)
}
\arguments{
\item{num_leaves}{(Tunable) Positive integer: Maximum number of leaves in one tree.}

\item{max_depth}{(Tunable) Integer: Maximum depth of trees.}

\item{lambda_l1}{(Tunable) Numeric: L1 regularization.}

\item{lambda_l2}{(Tunable) Numeric: L2 regularization.}

\item{min_data_in_leaf}{(Tunable) Positive integer: Minimum number of data in a leaf.}

\item{max_cat_threshold}{(Tunable) Positive integer: Maximum number of categories for categorical features.}

\item{min_data_per_group}{(Tunable) Positive integer: Minimum number of observations per categorical group.}

\item{linear_tree}{(Tunable) Logical: If TRUE, use linear trees.}

\item{objective}{Character: Objective function.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
LightCARTHyperparameters object.
}
\description{
Setup hyperparameters for LightCART training.
}
\details{
Get more information from \link[lightgbm:lgb.train]{lightgbm::lgb.train}.
}
\examples{
lightcart_hyperparams <- setup_LightCART(num_leaves = 32L, ifw = FALSE)
lightcart_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_LightGBM.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_LightGBM}
\alias{setup_LightGBM}
\title{Setup LightGBM Hyperparameters}
\usage{
setup_LightGBM(
  max_nrounds = 1000L,
  force_nrounds = NULL,
  early_stopping_rounds = 10L,
  num_leaves = 8L,
  max_depth = -1L,
  learning_rate = 0.01,
  feature_fraction = 1,
  subsample = 1,
  subsample_freq = 1L,
  lambda_l1 = 0,
  lambda_l2 = 0,
  max_cat_threshold = 32L,
  min_data_per_group = 32L,
  linear_tree = FALSE,
  ifw = FALSE,
  objective = NULL,
  device_type = "cpu",
  tree_learner = "serial",
  force_col_wise = TRUE
)
}
\arguments{
\item{max_nrounds}{Positive integer: Maximum number of boosting rounds.}

\item{force_nrounds}{Positive integer: Use this many boosting rounds. Disable search for nrounds.}

\item{early_stopping_rounds}{Positive integer: Number of rounds without improvement to stop training.}

\item{num_leaves}{(Tunable) Positive integer: Maximum number of leaves in one tree.}

\item{max_depth}{(Tunable) Integer: Maximum depth of trees.}

\item{learning_rate}{(Tunable) Numeric: Learning rate.}

\item{feature_fraction}{(Tunable) Numeric: Fraction of features to use.}

\item{subsample}{(Tunable) Numeric: Fraction of data to use.}

\item{subsample_freq}{(Tunable) Positive integer: Frequency of subsample.}

\item{lambda_l1}{(Tunable) Numeric: L1 regularization.}

\item{lambda_l2}{(Tunable) Numeric: L2 regularization.}

\item{max_cat_threshold}{(Tunable) Positive integer: Maximum number of categories for categorical features.}

\item{min_data_per_group}{(Tunable) Positive integer: Minimum number of observations per categorical group.}

\item{linear_tree}{Logical: If TRUE, use linear trees.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}

\item{objective}{Character: Objective function.}

\item{device_type}{Character: "cpu" or "gpu".}

\item{tree_learner}{Character: "serial", "feature", "data", or "voting".}

\item{force_col_wise}{Logical: Use only with CPU - If TRUE, force col-wise histogram building.}
}
\value{
LightGBMHyperparameters object.
}
\description{
Setup hyperparameters for LightGBM training.
}
\details{
Get more information from \link[lightgbm:lgb.train]{lightgbm::lgb.train}.
}
\examples{
lightgbm_hyperparams <- setup_LightGBM(
  max_nrounds = 500L,
  learning_rate = c(0.001, 0.01, 0.05), ifw = TRUE
)
lightgbm_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_LightRF.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_LightRF}
\alias{setup_LightRF}
\title{Setup LightRF Hyperparameters}
\usage{
setup_LightRF(
  nrounds = 500L,
  num_leaves = 4096L,
  max_depth = -1L,
  feature_fraction = 0.7,
  subsample = 0.623,
  lambda_l1 = 0,
  lambda_l2 = 0,
  max_cat_threshold = 32L,
  min_data_per_group = 32L,
  linear_tree = FALSE,
  ifw = FALSE,
  objective = NULL,
  device_type = "cpu",
  tree_learner = "serial",
  force_col_wise = TRUE
)
}
\arguments{
\item{nrounds}{(Tunable) Positive integer: Number of boosting rounds.}

\item{num_leaves}{(Tunable) Positive integer: Maximum number of leaves in one tree.}

\item{max_depth}{(Tunable) Integer: Maximum depth of trees.}

\item{feature_fraction}{(Tunable) Numeric: Fraction of features to use.}

\item{subsample}{(Tunable) Numeric: Fraction of data to use.}

\item{lambda_l1}{(Tunable) Numeric: L1 regularization.}

\item{lambda_l2}{(Tunable) Numeric: L2 regularization.}

\item{max_cat_threshold}{(Tunable) Positive integer: Maximum number of categories for categorical features.}

\item{min_data_per_group}{(Tunable) Positive integer: Minimum number of observations per categorical group.}

\item{linear_tree}{Logical: If TRUE, use linear trees.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}

\item{objective}{Character: Objective function.}

\item{device_type}{Character: "cpu" or "gpu".}

\item{tree_learner}{Character: "serial", "feature", "data", or "voting".}

\item{force_col_wise}{Logical: Use only with CPU - If TRUE, force col-wise histogram building.}
}
\value{
LightRFHyperparameters object.
}
\description{
Setup hyperparameters for LightRF training.
}
\details{
Get more information from \link[lightgbm:lgb.train]{lightgbm::lgb.train}.
Note that hyperparameters subsample_freq and early_stopping_rounds are fixed,
and cannot be set because they are what makes \code{lightgbm} train a random forest.
These can all be set when training gradient boosting with LightGBM.
}
\examples{
lightrf_hyperparams <- setup_LightRF(nrounds = 1000L, ifw = FALSE)
lightrf_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_LightRuleFit.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_LightRuleFit}
\alias{setup_LightRuleFit}
\title{Setup LightRuleFit Hyperparameters}
\usage{
setup_LightRuleFit(
  nrounds = 200L,
  num_leaves = 32L,
  max_depth = 4L,
  learning_rate = 0.1,
  subsample = 0.666,
  subsample_freq = 1L,
  lambda_l1 = 0,
  lambda_l2 = 0,
  objective = NULL,
  ifw_lightgbm = FALSE,
  alpha = 1,
  lambda = NULL,
  ifw_glmnet = FALSE,
  ifw = FALSE
)
}
\arguments{
\item{nrounds}{(Tunable) Positive integer: Number of boosting rounds.}

\item{num_leaves}{(Tunable) Positive integer: Maximum number of leaves in one tree.}

\item{max_depth}{(Tunable) Integer: Maximum depth of trees.}

\item{learning_rate}{(Tunable) Numeric: Learning rate.}

\item{subsample}{(Tunable) Numeric: Fraction of data to use.}

\item{subsample_freq}{(Tunable) Positive integer: Frequency of subsample.}

\item{lambda_l1}{(Tunable) Numeric: L1 regularization.}

\item{lambda_l2}{(Tunable) Numeric: L2 regularization.}

\item{objective}{Character: Objective function.}

\item{ifw_lightgbm}{(Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the LightGBM
step.}

\item{alpha}{(Tunable) Numeric: Alpha for GLMNET.}

\item{lambda}{Numeric: Lambda for GLMNET.}

\item{ifw_glmnet}{(Tunable) Logical: If TRUE, use Inverse Frequency Weighting in the GLMNET step.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification. This applies IFW
to both LightGBM and GLMNET.}
}
\value{
LightRuleFitHyperparameters object.
}
\description{
Setup hyperparameters for LightRuleFit training.
}
\details{
Get more information from \link[lightgbm:lgb.train]{lightgbm::lgb.train}.
}
\examples{
lightrulefit_hyperparams <- setup_LightRuleFit(nrounds = 300L, max_depth = 3L)
lightrulefit_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_LinearSVM.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_LinearSVM}
\alias{setup_LinearSVM}
\title{Setup LinearSVM Hyperparameters}
\usage{
setup_LinearSVM(cost = 1, ifw = FALSE)
}
\arguments{
\item{cost}{(Tunable) Numeric: Cost of constraints violation.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
LinearSVMHyperparameters object.
}
\description{
Setup hyperparameters for LinearSVM training.
}
\details{
Get more information from \link[e1071:svm]{e1071::svm}.
}
\examples{
linear_svm_hyperparams <- setup_LinearSVM(cost = 0.5, ifw = TRUE)
linear_svm_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_NMF.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/11_DecompositionConfig.R
\name{setup_NMF}
\alias{setup_NMF}
\title{Setup NMF config.}
\usage{
setup_NMF(k = 2L, method = "brunet", nrun = if (length(k) > 1L) 30L else 1L)
}
\arguments{
\item{k}{Integer: Number of components.}

\item{method}{Character: NMF method. See \code{NMF::nmf}.}

\item{nrun}{Integer: Number of runs to perform.}
}
\value{
NMFConfig object.
}
\description{
Setup NMF config.
}
\examples{
nmf_config <- setup_NMF(k = 3L)
nmf_config
}
\author{
EDG
}


================================================
FILE: man/setup_NeuralGas.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/09_ClusteringConfig.R
\name{setup_NeuralGas}
\alias{setup_NeuralGas}
\title{Setup NeuralGasConfig}
\usage{
setup_NeuralGas(k = 3L, dist = c("euclidean", "manhattan"))
}
\arguments{
\item{k}{Number of clusters.}

\item{dist}{Character: Distance measure to use: 'euclidean' or 'manhattan'.}
}
\value{
NeuralGasConfig object.
}
\description{
Setup NeuralGasConfig
}
\examples{
neuralgas_config <- setup_NeuralGas(k = 4L, dist = "euclidean")
neuralgas_config
}
\author{
EDG
}


================================================
FILE: man/setup_PCA.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/11_DecompositionConfig.R
\name{setup_PCA}
\alias{setup_PCA}
\title{Setup PCA config.}
\usage{
setup_PCA(k = 3L, center = TRUE, scale = TRUE, tol = NULL)
}
\arguments{
\item{k}{Integer: Number of components. (passed to \code{prcomp} \code{rank.})}

\item{center}{Logical: If TRUE, center the data.}

\item{scale}{Logical: If TRUE, scale the data.}

\item{tol}{Numeric: Tolerance.}
}
\value{
PCAConfig object.
}
\description{
Setup PCA config.
}
\examples{
pca_config <- setup_PCA(k = 3L)
pca_config
}
\author{
EDG
}


================================================
FILE: man/setup_Preprocessor.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/04_Preprocessor.R
\name{setup_Preprocessor}
\alias{setup_Preprocessor}
\title{Setup Preprocessor}
\usage{
setup_Preprocessor(
  complete_cases = FALSE,
  remove_features_thres = NULL,
  remove_cases_thres = NULL,
  missingness = FALSE,
  impute = FALSE,
  impute_type = c("missRanger", "micePMM", "meanMode"),
  impute_missRanger_params = list(pmm.k = 3, maxiter = 10, num.trees = 500),
  impute_discrete = "get_mode",
  impute_continuous = "mean",
  integer2factor = FALSE,
  integer2numeric = FALSE,
  logical2factor = FALSE,
  logical2numeric = FALSE,
  numeric2factor = FALSE,
  numeric2factor_levels = NULL,
  numeric_cut_n = 0,
  numeric_cut_labels = FALSE,
  numeric_quant_n = 0,
  numeric_quant_NAonly = FALSE,
  unique_len2factor = 0,
  character2factor = FALSE,
  factorNA2missing = FALSE,
  factorNA2missing_level = "missing",
  factor2integer = FALSE,
  factor2integer_startat0 = TRUE,
  scale = FALSE,
  center = scale,
  scale_centers = NULL,
  scale_coefficients = NULL,
  remove_constants = FALSE,
  remove_constants_skip_missing = TRUE,
  remove_features = NULL,
  remove_duplicates = FALSE,
  one_hot = FALSE,
  one_hot_levels = NULL,
  add_date_features = FALSE,
  date_features = c("weekday", "month", "year"),
  add_holidays = FALSE,
  exclude = NULL
)
}
\arguments{
\item{complete_cases}{Logical: If TRUE, only retain complete cases (no missing data).}

\item{remove_features_thres}{Float (0, 1): Remove features with missing
values in >= to this fraction of cases.}

\item{remove_cases_thres}{Float (0, 1): Remove cases with >= to this fraction
of missing features.}

\item{missingness}{Logical: If TRUE, generate new boolean columns for each
feature with missing values, indicating which cases were missing data.}

\item{impute}{Logical: If TRUE, impute missing cases. See \code{impute_discrete} and
\code{impute_continuous}.}

\item{impute_type}{Character: Package to use for imputation.}

\item{impute_missRanger_params}{Named list with elements "pmm.k" and
"maxiter", which are passed to \code{missRanger::missRanger}. \code{pmm.k}
greater than 0 results in predictive mean matching. Default \code{pmm.k = 3}
\code{maxiter = 10} \code{num.trees = 500}. Reduce \code{num.trees} for
faster imputation especially in large datasets. Set \code{pmm.k = 0} to
disable predictive mean matching.}

\item{impute_discrete}{Character: Name of function that returns single value: How to impute
discrete variables for \code{impute_type = "meanMode"}.}

\item{impute_continuous}{Character: Name of function that returns single value: How to impute
continuous variables for \code{impute_type = "meanMode"}.}

\item{integer2factor}{Logical: If TRUE, convert all integers to factors. This includes
\code{bit64::integer64} columns.}

\item{integer2numeric}{Logical: If TRUE, convert all integers to numeric
(will only work if \code{integer2factor = FALSE}). This includes
\code{bit64::integer64} columns.}

\item{logical2factor}{Logical: If TRUE, convert all logical variables to
factors.}

\item{logical2numeric}{Logical: If TRUE, convert all logical variables to
numeric.}

\item{numeric2factor}{Logical: If TRUE, convert all numeric variables to
factors.}

\item{numeric2factor_levels}{Character vector: Optional - will be passed to
\code{levels} arg of \code{factor()} if \code{numeric2factor = TRUE}. For advanced/
specific use cases; need to know unique values of numeric vector(s) and given all
numeric vars have same unique values.}

\item{numeric_cut_n}{Integer: If > 0, convert all numeric variables to factors by
binning using \code{base::cut} with \code{breaks} equal to this number.}

\item{numeric_cut_labels}{Logical: The \code{labels} argument of \link[base:cut]{base::cut}.}

\item{numeric_quant_n}{Integer: If > 0, convert all numeric variables to factors by
binning using \code{base::cut} with \code{breaks} equal to this number of quantiles.
produced using \code{stats::quantile}.}

\item{numeric_quant_NAonly}{Logical: If TRUE, only bin numeric variables with
missing values.}

\item{unique_len2factor}{Integer (>=2): Convert all variables with less
than or equal to this number of unique values to factors.
For example, if binary variables are encoded with 1, 2, you could use
\code{unique_len2factor = 2} to convert them to factors.}

\item{character2factor}{Logical: If TRUE, convert all character variables to
factors.}

\item{factorNA2missing}{Logical: If TRUE, make NA values in factors be of
level \code{factorNA2missing_level}. In many cases this is the preferred way
to handle missing data in categorical variables. Note that since this step
is performed before imputation, you can use this option to handle missing
data in categorical variables and impute numeric variables in the same
\code{preprocess} call.}

\item{factorNA2missing_level}{Character: Name of level if
\code{factorNA2missing = TRUE}.}

\item{factor2integer}{Logical: If TRUE, convert all factors to integers.}

\item{factor2integer_startat0}{Logical: If TRUE, start integer coding at 0.}

\item{scale}{Logical: If TRUE, scale columns of \code{x}.}

\item{center}{Logical: If TRUE, center columns of \code{x}. Note that by
default it is the same as \code{scale}.}

\item{scale_centers}{Named vector: Centering values for each feature.}

\item{scale_coefficients}{Named vector: Scaling values for each feature.}

\item{remove_constants}{Logical: If TRUE, remove constant columns.}

\item{remove_constants_skip_missing}{Logical: If TRUE, skip missing values, before
checking if feature is constant.}

\item{remove_features}{Character vector: Features to remove.}

\item{remove_duplicates}{Logical: If TRUE, remove duplicate cases.}

\item{one_hot}{Logical: If TRUE, convert all factors using one-hot encoding.}

\item{one_hot_levels}{List: Named list of the form "feature_name" = "levels". Used when applying
one-hot encoding to validation or test data using \code{Preprocessor}.}

\item{add_date_features}{Logical: If TRUE, extract date features from date columns.}

\item{date_features}{Character vector: Features to extract from dates.}

\item{add_holidays}{Logical: If TRUE, extract holidays from date columns.}

\item{exclude}{Integer, vector: Exclude these columns from preprocessing.}
}
\value{
\code{PreprocessorConfig} object.
}
\description{
Creates a \code{PreprocessorConfig} object, which can be used in \link{preprocess}.
}
\section{Order of Operations}{

\itemize{
\item keep complete cases only
\item remove constants
\item remove duplicates
\item remove cases by missingness threshold
\item remove features by missingness threshold
\item integer to factor
\item integer to numeric
\item logical to factor
\item logical to numeric
\item numeric to factor
\item cut numeric to n bins
\item cut numeric to n quantiles
\item numeric with less than N unique values to factor
\item character to factor
\item factor NA to named level
\item add missingness column
\item impute
\item scale and/or center
\item one-hot encoding
}
}

\examples{
preproc_config <- setup_Preprocessor(factorNA2missing = TRUE)
preproc_config
}
\author{
EDG
}


================================================
FILE: man/setup_RadialSVM.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_RadialSVM}
\alias{setup_RadialSVM}
\title{Setup RadialSVM Hyperparameters}
\usage{
setup_RadialSVM(cost = 1, gamma = 0.01, ifw = FALSE)
}
\arguments{
\item{cost}{(Tunable) Numeric: Cost of constraints violation.}

\item{gamma}{(Tunable) Numeric: Kernel coefficient.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
RadialSVMHyperparameters object.
}
\description{
Setup hyperparameters for RadialSVM training.
}
\details{
Get more information from \link[e1071:svm]{e1071::svm}.
}
\examples{
radial_svm_hyperparams <- setup_RadialSVM(cost = 10, gamma = 0.1, ifw = TRUE)
radial_svm_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_Ranger.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_Ranger}
\alias{setup_Ranger}
\title{Setup Ranger Hyperparameters}
\usage{
setup_Ranger(
  num_trees = 500,
  mtry = NULL,
  importance = "impurity",
  write_forest = TRUE,
  probability = FALSE,
  min_node_size = NULL,
  min_bucket = NULL,
  max_depth = NULL,
  replace = TRUE,
  sample_fraction = ifelse(replace, 1, 0.632),
  case_weights = NULL,
  class_weights = NULL,
  splitrule = NULL,
  num_random_splits = 1,
  alpha = 0.5,
  minprop = 0.1,
  poisson_tau = 1,
  split_select_weights = NULL,
  always_split_variables = NULL,
  respect_unordered_factors = NULL,
  scale_permutation_importance = FALSE,
  local_importance = FALSE,
  regularization_factor = 1,
  regularization_usedepth = FALSE,
  keep_inbag = FALSE,
  inbag = NULL,
  holdout = FALSE,
  quantreg = FALSE,
  time_interest = NULL,
  oob_error = TRUE,
  save_memory = FALSE,
  verbose = TRUE,
  node_stats = FALSE,
  seed = NULL,
  na_action = "na.learn",
  ifw = FALSE
)
}
\arguments{
\item{num_trees}{(Tunable) Positive integer: Number of trees.}

\item{mtry}{(Tunable) Positive integer: Number of features to consider at each split.}

\item{importance}{Character: Variable importance mode. "none", "impurity", "impurity_corrected", "permutation".
The "impurity" measure is the Gini index for classification, the variance of the responses for regression.}

\item{write_forest}{Logical: Save ranger.forest object, required for prediction. Set to FALSE to reduce memory usage if no prediction intended.}

\item{probability}{Logical: Grow a probability forest as in Malley et al. (2012). For classification only.}

\item{min_node_size}{(Tunable) Positive integer: Minimal node size. Default 1 for classification, 5 for regression, 3 for survival, and 10 for probability.}

\item{min_bucket}{Positive integer: Minimal number of samples in a terminal node. Only for survival. Deprecated in favor of min_node_size.}

\item{max_depth}{(Tunable) Positive integer: Maximal tree depth. A value of NULL or 0 (the default) corresponds to unlimited depth, 1 to tree stumps (1 split per tree).}

\item{replace}{Logical: Sample with replacement.}

\item{sample_fraction}{(Tunable) Numeric: Fraction of observations to sample. Default is 1 for sampling with replacement and 0.632 for sampling without replacement.}

\item{case_weights}{Numeric vector: Weights for sampling of training observations. Observations with larger weights will be selected with higher probability in the bootstrap (or subsampled) samples for the trees.}

\item{class_weights}{Numeric vector: Weights for the outcome classes for classification. Vector of the same length as the number of classes, with names corresponding to the class labels.}

\item{splitrule}{(Tunable) Character: Splitting rule. For classification: "gini", "extratrees", "hellinger". For regression: "variance", "extratrees", "maxstat", "beta". For survival: "logrank", "extratrees", "C", "maxstat".}

\item{num_random_splits}{(Tunable) Positive integer: For "extratrees" splitrule: Number of random splits to consider for each candidate splitting variable.}

\item{alpha}{(Tunable) Numeric: For "maxstat" splitrule: significance threshold to allow splitting.}

\item{minprop}{(Tunable) Numeric: For "maxstat" splitrule: lower quantile of covariate distribution to be considered for splitting.}

\item{poisson_tau}{Numeric: For "poisson" regression splitrule: tau parameter for Poisson regression.}

\item{split_select_weights}{Numeric vector: Numeric vector with weights between 0 and 1, representing the probability to select variables for splitting. Alternatively, a list of size num_trees, with one weight vector per tree.}

\item{always_split_variables}{Character vector: Character vector with variable names to be always selected in addition to the mtry variables tried for splitting.}

\item{respect_unordered_factors}{Character or logical: Handling of unordered factor covariates. For "partition" all 2^(k-1)-1 possible partitions are considered for splitting, where k is the number of factor levels. For "ignore", all factor levels are ordered by their first occurrence in the data. For "order", all factor levels are ordered by their average response. TRUE corresponds to "partition" for the randomForest package compatibility.}

\item{scale_permutation_importance}{Logical: Scale permutation importance by standard error as in (Breiman 2001). Only applicable if permutation variable importance mode selected.}

\item{local_importance}{Logical: For permutation variable importance, use local importance as in Breiman (2001) and Liaw & Wiener (2002).}

\item{regularization_factor}{(Tunable) Numeric: Regularization factor. Penalize variables with many split points. Requires splitrule = "variance".}

\item{regularization_usedepth}{Logical: Use regularization factor with node depth. Requires regularization_factor.}

\item{keep_inbag}{Logical: Save how often observations are in-bag in each tree. These will be used for (local) variable importance if inbag.counts in predict() is NULL.}

\item{inbag}{List: Manually set observations per tree. List of size num_trees, containing inbag counts for each observation. Can be used for stratified sampling.}

\item{holdout}{Logical: Hold-out mode. Hold-out all samples with case weight 0 and use these for variable importance and prediction error.}

\item{quantreg}{Logical: Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). For regression only. Set keep_inbag = TRUE to prepare out-of-bag quantile prediction.}

\item{time_interest}{Numeric: For GWAS data: SNP with this number will be used as time variable. Only for survival. Deprecated, use time.var in formula instead.}

\item{oob_error}{Logical: Compute OOB prediction error. Set to FALSE to save computation time if only the forest is needed.}

\item{save_memory}{Logical: Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems.}

\item{verbose}{Logical: Show computation status and estimated runtime.}

\item{node_stats}{Logical: Save additional node statistics. Only terminal nodes for now.}

\item{seed}{Positive integer: Random seed. Default is NULL, which generates the seed from R. Set to 0 to ignore the R seed.}

\item{na_action}{Character: Action to take if the data contains missing values. "na.learn" uses observations with missing values in splitting, treating missing values as a separate category.}

\item{ifw}{Logical: Inverse Frequency Weighting for classification. If TRUE, class weights are set inversely proportional to the class frequencies.}
}
\value{
RangerHyperparameters object.
}
\description{
Setup hyperparameters for Ranger Random Forest training.
}
\details{
Get more information from \link[ranger:ranger]{ranger::ranger}.
}
\examples{
ranger_hyperparams <- setup_Ranger(num_trees = 1000L, ifw = FALSE)
ranger_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_Resampler.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/05_Resampler.R
\name{setup_Resampler}
\alias{setup_Resampler}
\title{Setup Resampler}
\usage{
setup_Resampler(
  n_resamples = 10L,
  type = c("KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"),
  stratify_var = NULL,
  train_p = 0.75,
  strat_n_bins = 4L,
  target_length = NULL,
  id_strat = NULL,
  seed = NULL,
  verbosity = 1L
)
}
\arguments{
\item{n_resamples}{Integer: Number of resamples to make.}

\item{type}{Character: Type of resampler: "KFold", "StratSub", "StratBoot", "Bootstrap", "LOOCV"}

\item{stratify_var}{Character: Variable to stratify by.}

\item{train_p}{Float: Training set percentage.}

\item{strat_n_bins}{Integer: Number of bins to stratify by.}

\item{target_length}{Integer: Target length for stratified bootstraps.}

\item{id_strat}{Integer: Vector of indices to stratify by. These may be, for example, case IDs
if your dataset contains repeated measurements. By specifying this vector, you can ensure that
each case can only be present in the training or test set, but not both.}

\item{seed}{Integer: Random seed.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
ResamplerConfig object.
}
\description{
Setup Resampler
}
\examples{
tenfold_resampler <- setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L)
tenfold_resampler
}
\author{
EDG
}


================================================
FILE: man/setup_SuperConfig.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/14_SuperConfig.R
\name{setup_SuperConfig}
\alias{setup_SuperConfig}
\title{Setup SuperConfig}
\usage{
setup_SuperConfig(
  dat_training_path,
  dat_validation_path = NULL,
  dat_test_path = NULL,
  weights = NULL,
  preprocessor_config = NULL,
  algorithm = NULL,
  hyperparameters = NULL,
  tuner_config = NULL,
  outer_resampling_config = NULL,
  execution_config = setup_ExecutionConfig(),
  question = NULL,
  outdir = "results/",
  verbosity = 1L
)
}
\arguments{
\item{dat_training_path}{Character: Path to training data file.}

\item{dat_validation_path}{Character: Path to validation data file.}

\item{dat_test_path}{Character: Path to test data file.}

\item{weights}{Optional Character: Column name in training data to use as observation weights.
If NULL, no weights are used.}

\item{preprocessor_config}{\code{PreprocessorConfig} object: Configuration for data preprocessing.}

\item{algorithm}{Character: Algorithm to use for training.}

\item{hyperparameters}{\code{Hyperparameters} object: Configuration for model hyperparameters.}

\item{tuner_config}{\code{TunerConfig} object: Configuration for hyperparameter tuning.}

\item{outer_resampling_config}{\code{ResamplerConfig} object: Configuration for outer res
resampling during model training.}

\item{execution_config}{\code{ExecutionConfig} object: Configuration for execution settings. Setup
with \link{setup_ExecutionConfig}.}

\item{question}{Character: Question to answer with the supervised learning analysis.}

\item{outdir}{Character: Output directory for results.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{SuperConfig} object.
}
\description{
Setup \code{SuperConfig} object.
}
\examples{
sc <- setup_SuperConfig(
  dat_training_path = "train.csv",
  preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE),
  algorithm = "LightRF",
  hyperparameters = setup_LightRF(),
  tuner_config = setup_GridSearch(),
  outer_resampling_config = setup_Resampler(),
  execution_config = setup_ExecutionConfig(),
  question = "Can we tell iris species apart given their measurements?",
  outdir = "models/"
)
}
\author{
EDG
}


================================================
FILE: man/setup_SuperConfigLive.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/14_SuperConfig.R
\name{setup_SuperConfigLive}
\alias{setup_SuperConfigLive}
\title{Setup SuperConfigLive}
\usage{
setup_SuperConfigLive(
  dat_training,
  dat_validation = NULL,
  dat_test = NULL,
  weights = NULL,
  preprocessor_config = NULL,
  algorithm = NULL,
  hyperparameters = NULL,
  tuner_config = NULL,
  outer_resampling_config = NULL,
  execution_config = setup_ExecutionConfig(),
  question = NULL,
  outdir = NULL,
  verbosity = 1L
)
}
\arguments{
\item{dat_training}{data.frame or data.table. Training data.}

\item{dat_validation}{data.frame, data.table, or \code{NULL}.}

\item{dat_test}{data.frame, data.table, or \code{NULL}.}

\item{weights}{Character or \code{NULL}. Column name in \code{dat_training} used
as observation weights.}

\item{preprocessor_config, algorithm, hyperparameters, tuner_config, outer_resampling_config, execution_config, question, verbosity}{See \link{setup_SuperConfig}.}

\item{outdir}{Character or \code{NULL}. Output directory; \code{NULL} (the
default) means "do not write to disk" (the rtemislive case).}
}
\value{
\code{SuperConfigLive} object.
}
\description{
Build a \code{SuperConfigLive} — same shape as \link{setup_SuperConfig} but with
in-memory tabular data instead of file paths.
}
\author{
EDG
}


================================================
FILE: man/setup_TabNet.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/02_Hyperparameters.R
\name{setup_TabNet}
\alias{setup_TabNet}
\title{Setup TabNet Hyperparameters}
\usage{
setup_TabNet(
  batch_size = 1024^2,
  penalty = 0.001,
  clip_value = NULL,
  loss = "auto",
  epochs = 50L,
  drop_last = FALSE,
  decision_width = NULL,
  attention_width = NULL,
  num_steps = 3L,
  feature_reusage = 1.3,
  mask_type = "sparsemax",
  virtual_batch_size = 256^2,
  valid_split = 0,
  learn_rate = 0.02,
  optimizer = "adam",
  lr_scheduler = NULL,
  lr_decay = 0.1,
  step_size = 30,
  checkpoint_epochs = 10L,
  cat_emb_dim = 1L,
  num_independent = 2L,
  num_shared = 2L,
  num_independent_decoder = 1L,
  num_shared_decoder = 1L,
  momentum = 0.02,
  pretraining_ratio = 0.5,
  device = "auto",
  importance_sample_size = NULL,
  early_stopping_monitor = "auto",
  early_stopping_tolerance = 0,
  early_stopping_patience = 0,
  num_workers = 0L,
  skip_importance = FALSE,
  ifw = FALSE
)
}
\arguments{
\item{batch_size}{(Tunable) Positive integer: Batch size.}

\item{penalty}{(Tunable) Numeric: Regularization penalty.}

\item{clip_value}{Numeric: Clip value.}

\item{loss}{Character: Loss function.}

\item{epochs}{(Tunable) Positive integer: Number of epochs.}

\item{drop_last}{Logical: If TRUE, drop last batch.}

\item{decision_width}{(Tunable) Positive integer: Decision width.}

\item{attention_width}{(Tunable) Positive integer: Attention width.}

\item{num_steps}{(Tunable) Positive integer: Number of steps.}

\item{feature_reusage}{(Tunable) Numeric: Feature reusage.}

\item{mask_type}{Character: Mask type.}

\item{virtual_batch_size}{(Tunable) Positive integer: Virtual batch size.}

\item{valid_split}{Numeric: Validation split.}

\item{learn_rate}{(Tunable) Numeric: Learning rate.}

\item{optimizer}{Character or torch function: Optimizer.}

\item{lr_scheduler}{Character or torch function: "step", "reduce_on_plateau".}

\item{lr_decay}{Numeric: Learning rate decay.}

\item{step_size}{Positive integer: Step size.}

\item{checkpoint_epochs}{(Tunable) Positive integer: Checkpoint epochs.}

\item{cat_emb_dim}{(Tunable) Positive integer: Categorical embedding dimension.}

\item{num_independent}{(Tunable) Positive integer: Number of independent Gated Linear Units (GLU)
at each step of the encoder.}

\item{num_shared}{(Tunable) Positive integer: Number of shared Gated Linear Units (GLU) at each
step of the encoder.}

\item{num_independent_decoder}{(Tunable) Positive integer: Number of independent GLU layers for
pretraining.}

\item{num_shared_decoder}{(Tunable) Positive integer: Number of shared GLU layers for
pretraining.}

\item{momentum}{(Tunable) Numeric: Momentum.}

\item{pretraining_ratio}{(Tunable) Numeric: Pretraining ratio.}

\item{device}{Character: Device "cpu" or "cuda".}

\item{importance_sample_size}{Positive integer: Importance sample size.}

\item{early_stopping_monitor}{Character: Early stopping monitor. "valid_loss", "train_loss",
"auto".}

\item{early_stopping_tolerance}{Numeric: Minimum relative improvement to reset the patience
counter.}

\item{early_stopping_patience}{Positive integer: Number of epochs without improving before
stopping.}

\item{num_workers}{Positive integer: Number of subprocesses for data loacding.}

\item{skip_importance}{Logical: If TRUE, skip importance calculation.}

\item{ifw}{Logical: If TRUE, use Inverse Frequency Weighting in classification.}
}
\value{
TabNetHyperparameters object.
}
\description{
Setup hyperparameters for TabNet training.
}
\examples{
tabnet_hyperparams <- setup_TabNet(epochs = 100L, learn_rate = 0.01)
tabnet_hyperparams
}
\author{
EDG
}


================================================
FILE: man/setup_UMAP.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/11_DecompositionConfig.R
\name{setup_UMAP}
\alias{setup_UMAP}
\title{Setup UMAP config.}
\usage{
setup_UMAP(
  k = 2L,
  n_neighbors = 15L,
  init = "spectral",
  metric = c("euclidean", "cosine", "manhattan", "hamming", "categorical"),
  n_epochs = NULL,
  learning_rate = 1,
  scale = TRUE
)
}
\arguments{
\item{k}{Integer: Number of components.}

\item{n_neighbors}{Integer: Number of keighbors.}

\item{init}{Character: Initialization type. See \verb{uwot::umap "init"}.}

\item{metric}{Character: Distance metric to use: "euclidean", "cosine",
"manhattan", "hamming", "categorical".}

\item{n_epochs}{Integer: Number of epochs.}

\item{learning_rate}{Float: Learning rate.}

\item{scale}{Logical: If TRUE, scale input data before doing UMAP.}
}
\value{
UMAPConfig object.
}
\description{
Setup UMAP config.
}
\details{
A high \code{n_neighbors} value may give error in some systems:
"Error in irlba::irlba(L, nv = n, nu = 0, maxit = iters) :
function 'as_cholmod_sparse' not provided by package 'Matrix'"
}
\examples{
umap_config <- setup_UMAP(k = 3L)
umap_config
}
\author{
EDG
}


================================================
FILE: man/setup_tSNE.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/11_DecompositionConfig.R
\name{setup_tSNE}
\alias{setup_tSNE}
\title{Setup tSNE config.}
\usage{
setup_tSNE(
  k = 2L,
  initial_dims = 50L,
  perplexity = 30,
  theta = 0.5,
  check_duplicates = TRUE,
  pca = TRUE,
  partial_pca = FALSE,
  max_iter = 1000L,
  verbose = getOption("verbose", FALSE),
  is_distance = FALSE,
  Y_init = NULL,
  pca_center = TRUE,
  pca_scale = FALSE,
  normalize = TRUE,
  stop_lying_iter = ifelse(is.null(Y_init), 250L, 0L),
  mom_switch_iter = ifelse(is.null(Y_init), 250L, 0L),
  momentum = 0.5,
  final_momentum = 0.8,
  eta = 200,
  exaggeration_factor = 12,
  num_threads = 1L
)
}
\arguments{
\item{k}{Integer: Number of components.}

\item{initial_dims}{Integer: Initial dimensions.}

\item{perplexity}{Integer: Perplexity.}

\item{theta}{Float: Theta.}

\item{check_duplicates}{Logical: If TRUE, check for duplicates.}

\item{pca}{Logical: If TRUE, perform PCA.}

\item{partial_pca}{Logical: If TRUE, perform partial PCA.}

\item{max_iter}{Integer: Maximum number of iterations.}

\item{verbose}{Logical: If TRUE, print messages.}

\item{is_distance}{Logical: If TRUE, \code{x} is a distance matrix.}

\item{Y_init}{Matrix: Initial Y matrix.}

\item{pca_center}{Logical: If TRUE, center PCA.}

\item{pca_scale}{Logical: If TRUE, scale PCA.}

\item{normalize}{Logical: If TRUE, normalize.}

\item{stop_lying_iter}{Integer: Stop lying iterations.}

\item{mom_switch_iter}{Integer: Momentum switch iterations.}

\item{momentum}{Float: Momentum.}

\item{final_momentum}{Float: Final momentum.}

\item{eta}{Float: Eta.}

\item{exaggeration_factor}{Float: Exaggeration factor.}

\item{num_threads}{Integer: Number of threads.}
}
\value{
tSNEConfig object.
}
\description{
Setup tSNE config.
}
\details{
Get more information on the config by running \code{?Rtsne::Rtsne}.
}
\examples{
tSNE_config <- setup_tSNE(k = 3L)
tSNE_config
}
\author{
EDG
}


================================================
FILE: man/size.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{size}
\alias{size}
\title{Size of object}
\usage{
size(x, verbosity = 1L)
}
\arguments{
\item{x}{any object with \code{length()} or \code{dim()}.}

\item{verbosity}{Integer: Verbosity level. If > 0, print size to console}
}
\value{
Integer vector with length equal to the number of dimensions of \code{x}, invisibly.
}
\description{
Returns the size of an object
}
\details{
If \code{dim(x)} is NULL, returns \code{length(x)}.
}
\examples{
x <- rnorm(20)
size(x)
# 20
x <- matrix(rnorm(100), 20, 5)
size(x)
# 20  5
}
\author{
EDG
}


================================================
FILE: man/table_column_attr.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_data.R
\name{table_column_attr}
\alias{table_column_attr}
\title{Tabulate column attributes}
\usage{
table_column_attr(x, attr = "source", useNA = "always")
}
\arguments{
\item{x}{tabular data: Input data set.}

\item{attr}{Character: Attribute to get}

\item{useNA}{Character: Passed to \code{table}}
}
\value{
table.
}
\description{
Tabulate column attributes
}
\examples{
library(data.table)
x <- data.table(
  id = 1:5,
  sbp = rnorm(5, 120, 15),
  dbp = rnorm(5, 80, 10),
  paO2 = rnorm(5, 90, 10),
  paCO2 = rnorm(5, 40, 5)
)
setattr(x[["sbp"]], "source", "outpatient")
setattr(x[["dbp"]], "source", "outpatient")
setattr(x[["paO2"]], "source", "icu")
setattr(x[["paCO2"]], "source", "icu")
table_column_attr(x, "source")
}
\author{
EDG
}


================================================
FILE: man/theme.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/theme.R
\name{theme_black}
\alias{theme_black}
\alias{theme_blackgrid}
\alias{theme_blackigrid}
\alias{theme_darkgray}
\alias{theme_darkgraygrid}
\alias{theme_darkgrayigrid}
\alias{theme_white}
\alias{theme_whitegrid}
\alias{theme_whiteigrid}
\alias{theme_lightgraygrid}
\alias{theme_mediumgraygrid}
\title{Themes for \verb{draw_*} functions}
\usage{
theme_black(
  bg = "#000000",
  plot_bg = "transparent",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = FALSE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = fg,
  grid_alpha = 0.2,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = 0.5,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_blackgrid(
  bg = "#000000",
  plot_bg = "transparent",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = fg,
  grid_alpha = 0.2,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_blackigrid(
  bg = "#000000",
  plot_bg = "#1A1A1A",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = bg,
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_darkgray(
  bg = "#121212",
  plot_bg = "transparent",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = FALSE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = fg,
  grid_alpha = 0.2,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = 0.5,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_darkgraygrid(
  bg = "#121212",
  plot_bg = "transparent",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = "#404040",
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "#00000000",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_darkgrayigrid(
  bg = "#121212",
  plot_bg = "#202020",
  fg = "#ffffff",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = bg,
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "transparent",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_white(
  bg = "#ffffff",
  plot_bg = "transparent",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = FALSE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = fg,
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = fg,
  tick_alpha = 0.5,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_whitegrid(
  bg = "#ffffff",
  plot_bg = "transparent",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = "#c0c0c0",
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "#00000000",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_whiteigrid(
  bg = "#ffffff",
  plot_bg = "#E6E6E6",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = bg,
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "transparent",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_lightgraygrid(
  bg = "#dfdfdf",
  plot_bg = "transparent",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = "#c0c0c0",
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "#00000000",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)

theme_mediumgraygrid(
  bg = "#b3b3b3",
  plot_bg = "transparent",
  fg = "#000000",
  pch = 16,
  cex = 1,
  lwd = 2,
  bty = "n",
  box_col = fg,
  box_alpha = 1,
  box_lty = 1,
  box_lwd = 0.5,
  grid = TRUE,
  grid_nx = NULL,
  grid_ny = NULL,
  grid_col = "#d0d0d0",
  grid_alpha = 1,
  grid_lty = 1,
  grid_lwd = 1,
  axes_visible = TRUE,
  axes_col = "transparent",
  tick_col = "#00000000",
  tick_alpha = 1,
  tick_labels_col = fg,
  tck = -0.01,
  tcl = NA,
  x_axis_side = 1,
  y_axis_side = 2,
  labs_col = fg,
  x_axis_line = 0,
  x_axis_las = 0,
  x_axis_padj = -1.1,
  x_axis_hadj = 0.5,
  y_axis_line = 0,
  y_axis_las = 1,
  y_axis_padj = 0.5,
  y_axis_hadj = 0.5,
  xlab_line = 1.4,
  ylab_line = 2,
  zerolines = TRUE,
  zerolines_col = fg,
  zerolines_alpha = 0.5,
  zerolines_lty = 1,
  zerolines_lwd = 1,
  main_line = 0.25,
  main_adj = 0,
  main_font = 2,
  main_col = fg,
  font_family = getOption("rtemis_font", "Helvetica")
)
}
\arguments{
\item{bg}{Color: Figure background.}

\item{plot_bg}{Color: Plot region background.}

\item{fg}{Color: Foreground color used as default for multiple elements like
axes and labels, which can be defined separately.}

\item{pch}{Integer: Point character.}

\item{cex}{Float: Character expansion factor.}

\item{lwd}{Float: Line width.}

\item{bty}{Character: Box type:  "o", "l", "7", "c", "u", or "]", or "n".}

\item{box_col}{Box color if \code{bty != "n"}.}

\item{box_alpha}{Float: Box alpha.}

\item{box_lty}{Integer: Box line type.}

\item{box_lwd}{Float: Box line width.}

\item{grid}{Logical: If TRUE, draw grid in plot regions.}

\item{grid_nx}{Integer: N of vertical grid lines.}

\item{grid_ny}{Integer: N of horizontal grid lines.}

\item{grid_col}{Grid color.}

\item{grid_alpha}{Float: Grid alpha.}

\item{grid_lty}{Integer: Grid line type.}

\item{grid_lwd}{Float: Grid line width.}

\item{axes_visible}{Logical: If TRUE, draw axes.}

\item{axes_col}{Axes colors.}

\item{tick_col}{Tick color.}

\item{tick_alpha}{Float: Tick alpha.}

\item{tick_labels_col}{Tick labels' color.}

\item{tck}{\code{graphics::parr}'s tck argument: Tick length, can be negative.}

\item{tcl}{\code{graphics::parr}'s tcl argument.}

\item{x_axis_side}{Integer: Side to place x-axis.}

\item{y_axis_side}{Integer: Side to place y-axis.}

\item{labs_col}{Labels' color.}

\item{x_axis_line}{Numeric: \code{graphics::axis}'s \code{line} argument for the x-axis.}

\item{x_axis_las}{Numeric: \code{graphics::axis}'s \code{las} argument for the x-axis.}

\item{x_axis_padj}{Numeric: x-axis' \code{padj}: Adjustment for the x-axis
tick labels' position.}

\item{x_axis_hadj}{Numeric: x-axis' \code{hadj}.}

\item{y_axis_line}{Numeric: \code{graphics::axis}'s \code{line} argument for the y-axis.}

\item{y_axis_las}{Numeric: \code{graphics::axis}'s \code{las} argument for the y-axis.}

\item{y_axis_padj}{Numeric: y-axis' \code{padj}.}

\item{y_axis_hadj}{Numeric: y-axis' \code{hadj}.}

\item{xlab_line}{Numeric: Line to place \code{xlab}.}

\item{ylab_line}{Numeric: Line to place \code{ylab}.}

\item{zerolines}{Logical: If TRUE, draw lines on x = 0, y = 0, if within
plot limits.}

\item{zerolines_col}{Zerolines color.}

\item{zerolines_alpha}{Float: Zerolines alpha.}

\item{zerolines_lty}{Integer: Zerolines line type.}

\item{zerolines_lwd}{Float: Zerolines line width.}

\item{main_line}{Float: How many lines away from the plot region to draw
title.}

\item{main_adj}{Float: How to align title.}

\item{main_font}{Integer: 1: Regular, 2: Bold.}

\item{main_col}{Title color.}

\item{font_family}{Character: Font to be used throughout plot.}
}
\value{
\code{Theme} object.
}
\description{
Themes for \verb{draw_*} functions
}
\examples{
theme <- theme_black(font_family = "Geist")
theme
}


================================================
FILE: man/to_json.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R
\name{to_json}
\alias{to_json}
\title{Convert to JSON-serializable list}
\usage{
to_json(x, ...)
}
\arguments{
\item{x}{rtemis S7 object.}

\item{...}{Additional arguments passed to method.}
}
\value{
Named list. Pass through \code{jsonlite::toJSON(auto_unbox = TRUE)}
for serialization.
}
\description{
Convert an rtemis S7 object to a named list suitable for
\code{jsonlite::toJSON(auto_unbox = TRUE)}. Used by the rtemislive backend
to send structured results to the browser frontend without scraping
R console output.
}
\details{
Each output list includes a \code{.class} field equal to the most specific
S7 class name, allowing the frontend to dispatch to a class-specific
renderer.

The default method walks \code{props(x)}, recursing into S7-typed properties
and passing through primitive properties as-is. Per-class methods
override where the default isn't appropriate (e.g. classes whose props
include a \code{data.table}, an opaque model fit, or where some props should
be excluded for size or relevance reasons).
}
\author{
EDG
}
\keyword{internal}


================================================
FILE: man/train.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/train.R
\name{train}
\alias{train}
\title{Train Supervised Learning Models}
\usage{
train(
  x,
  dat_validation = NULL,
  dat_test = NULL,
  weights = NULL,
  algorithm = NULL,
  preprocessor_config = NULL,
  hyperparameters = NULL,
  tuner_config = NULL,
  outer_resampling_config = NULL,
  execution_config = setup_ExecutionConfig(),
  question = NULL,
  outdir = NULL,
  verbosity = 1L,
  ...
)
}
\arguments{
\item{x}{Tabular data, i.e. data.frame, data.table, or tbl_df (tibble): Training set data.}

\item{dat_validation}{Tabular data: Validation set data.}

\item{dat_test}{Tabular data: Test set data.}

\item{weights}{Optional vector of case weights.}

\item{algorithm}{Character: Algorithm to use. Can be left NULL, if \code{hyperparameters} is defined.}

\item{preprocessor_config}{Optional PreprocessorConfig object: Setup using \link{setup_Preprocessor}.}

\item{hyperparameters}{\code{Hyperparameters} object: Setup using one of \verb{setup_*} functions.}

\item{tuner_config}{TunerConfig object: Setup using \link{setup_GridSearch}.}

\item{outer_resampling_config}{Optional ResamplerConfig object: Setup using \link{setup_Resampler}.
This defines the outer resampling method, i.e. the splitting into training and test sets for the
purpose of assessing model performance. If NULL, no outer resampling is performed, in which case
you might want to use a \code{dat_test} dataset to assess model performance on a single test set.}

\item{execution_config}{\code{ExecutionConfig} object: Setup using \link{setup_ExecutionConfig}. This
allows you to set backend ("future", "mirai", or "none"), number of workers, and future plan if
using \code{backend = "future"}.}

\item{question}{Optional character string defining the question that the model is trying to
answer.}

\item{outdir}{Character, optional: String defining the output directory.}

\item{verbosity}{Integer: Verbosity level.}

\item{...}{Not used.}
}
\value{
Object of class \code{Regression(Supervised)}, \code{RegressionRes(SupervisedRes)},
\code{Classification(Supervised)}, or \code{ClassificationRes(SupervisedRes)}.
}
\description{
Preprocess, tune, train, and test supervised learning models using nested resampling in a single
call.
}
\details{
\strong{Online book & documentation}

See \href{https://docs.rtemis.org/r/}{docs.rtemis.org/r} for detailed documentation.

\strong{Preprocessing}

There are many different stages at which preprocessing could be applied, when running a
supervised learning pipeline with nested resampling. Some operations are best done before
passing data to \code{train()}:
\itemize{
\item Duplicate rows should be removed before resampling, so that duplicates don't end up in
different resamples, e.g. one in training and one in test.
\item Constant columns should be removed before resampling. A column may appear constant in a small
resample, even if it is not constant in the full dataset. Removing it inconsistently will
throw an error during prediction.
\item All data-dependent preprocessing steps need to be performed on training data only and applied
on validation and test data, e.g. scaling, centering, imputation.
}

User-defined preprocessing through \code{preprocessor_config} is applied on training set data,
the learned parameters are stored in the returned Supervised or SupervisedRes object, and the
preprocessing is applied on validation and test data.

\strong{Binary Classification}

For binary classification, the outcome should be a factor where \emph{the 2nd level
corresponds to the positive class}.

\strong{Resampling}

Note that you should not use an outer resampling method with
replacement if you will also be using an inner resampling (for tuning).
The duplicated cases from the outer resampling may appear both in the
training and test sets of the inner resamples, leading to underestimated
test error.

\strong{Reproducibility}

If using \emph{\strong{outer resampling}}, you can set a seed when defining \code{outer_resampling_config}, e.g.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{outer_resampling_config = setup_Resampler(n_resamples = 10L, type = "KFold", seed = 2026L)
}\if{html}{\out{</div>}}

If using \emph{\strong{tuning with inner resampling}}, you can set a seed when defining \code{tuner_config},
e.g.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{tuner_config = setup_GridSearch(
  resampler_config = setup_Resampler(n_resamples = 5L, type = "KFold", seed = 2027L)
)
}\if{html}{\out{</div>}}

\strong{Parallelization}

There are three levels of parallelization that may be used during training:
\enumerate{
\item Algorithm training (e.g. a parallelized learner like LightGBM)
\item Tuning (inner resampling, where multiple resamples can be processed in parallel)
\item Outer resampling (where multiple outer resamples can be processed in parallel)
}

The \code{train()} function will automatically manage parallelization depending
on:
\itemize{
\item The number of workers specified by the user using \code{n_workers}
\item Whether the training algorithm supports parallelization itself
\item Whether hyperparameter tuning is needed
}
}
\examples{
\donttest{
iris_c_lightRF <- train(
   iris,
   algorithm = "LightRF",
   outer_resampling_config = setup_Resampler(),
)
}
}
\author{
EDG
}


================================================
FILE: man/uniprot_get.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_uniprot.R
\name{uniprot_get}
\alias{uniprot_get}
\title{Get protein sequence from UniProt}
\usage{
uniprot_get(
  accession,
  baseURL = "https://rest.uniprot.org/uniprotkb",
  verbosity = 1
)
}
\arguments{
\item{accession}{Character: UniProt Accession number - e.g. "Q9UMX9"}

\item{baseURL}{Character: UniProt rest API base URL.
Default = "https://rest.uniprot.org/uniprotkb"}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
List with three elements: Identifier, Annotation, and Sequence.
}
\description{
Get protein sequence from UniProt
}
\examples{
\dontrun{
# This gets the sequence from uniprot.org by default
mapt <- uniprot_get("Q9UMX9")
}
}
\author{
E.D. Gennatas
}


================================================
FILE: man/with_msg_sink.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/msg.R
\name{with_msg_sink}
\alias{with_msg_sink}
\title{Run code with a temporary message sink}
\usage{
with_msg_sink(sink, code)
}
\arguments{
\item{sink}{Sink function or \code{NULL}.}

\item{code}{Code to run.}
}
\value{
The value returned by \code{code}.
}
\description{
Sets \code{sink} for the duration of \code{code}, restoring the previous sink on exit
(including on error). Useful in tests and for short-lived capture.
}
\examples{
captured <- list()
with_msg_sink(
  function(m) captured[[length(captured) + 1L]] <<- m,
  {
    # any msg() / msg0() / msgstart() / msgdone() calls in here are captured
  }
)
}
\seealso{
\code{\link[=set_msg_sink]{set_msg_sink()}}, \code{\link[=get_msg_sink]{get_msg_sink()}}.
}
\author{
EDG
}


================================================
FILE: man/write_toml.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/00_S7init.R, R/14_SuperConfig.R
\name{write_toml}
\alias{write_toml}
\title{Write to TOML file}
\usage{
write_toml(x, file, overwrite = FALSE, verbosity = 1L)

## S7 method for class <rtemis::SuperConfig>
write_toml(x, file, overwrite = FALSE, verbosity = 1L)
}
\arguments{
\item{x}{\code{SuperConfig} object.}

\item{file}{Character: Path to output TOML file.}

\item{overwrite}{Logical: If TRUE, overwrite existing file.}

\item{verbosity}{Integer: Verbosity level.}
}
\value{
\code{SuperConfig} object, invisibly.
}
\description{
Write to TOML file
}
\examples{
x <- setup_SuperConfig(
  dat_training_path = "~/Data/iris.csv",
  dat_validation_path = NULL,
  dat_test_path = NULL,
  weights = NULL,
  preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE),
  algorithm = "LightRF",
  hyperparameters = setup_LightRF(),
  tuner_config = setup_GridSearch(),
  outer_resampling_config = setup_Resampler(),
  execution_config = setup_ExecutionConfig(),
  question = "Can we tell iris species apart given their measurements?",
  outdir = "models/",
  verbosity = 1L
)
tmpdir <- tempdir()
write_toml(x, file.path(tmpdir, "rtemis.toml"))
}
\author{
EDG
}


================================================
FILE: man/xt_example.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_xt_example.R
\docType{data}
\name{xt_example}
\alias{xt_example}
\title{Example longitudinal dataset}
\format{
A data frame with 30 rows and 4 variables:
\describe{
\item{patient_id}{Integer: Patient identifier (1-10).}
\item{year}{Integer: Year of measurement (2020-2024).}
\item{blood_pressure}{Numeric: Systolic blood pressure measurement.}
\item{treatment}{Character: Treatment group ("A" or "B").}
}
}
\usage{
xt_example
}
\description{
A small synthetic dataset demonstrating various participation patterns
in longitudinal data, suitable for examples with \code{\link{xtdescribe}}.
}
\details{
This dataset includes 10 patients measured at up to 5 time points (years 2020-2024).
The dataset demonstrates various participation patterns typical in longitudinal studies:
\itemize{
\item Complete participation (all time points)
\item Early dropout
\item Late entry
\item Intermittent participation
\item Single time point participation
}
}
\examples{
data(xt_example)
head(xt_example)
summary(xt_example)

}
\keyword{datasets}


================================================
FILE: man/xtdescribe.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils_xt.R
\name{xtdescribe}
\alias{xtdescribe}
\title{Describe longitudinal dataset}
\usage{
xtdescribe(x, id_col = 1, time_col = 2, n_patterns = 9)
}
\arguments{
\item{x}{data.frame: Longitudinal data with ID and time variables.}

\item{id_col}{Integer: The column position of the ID variable.}

\item{time_col}{Integer: The column position of the time variable.}

\item{n_patterns}{Integer: The number of patterns to display.}
}
\value{
data.frame: Summary of participation patterns, returned invisibly.
}
\description{
This function emulates the \code{xtdescribe} function in Stata.
}
\examples{
# Load example longitudinal dataset
data(xt_example)

# Describe the longitudinal structure
xtdescribe(xt_example)
}
\author{
EDG
}


================================================
FILE: tests/testthat/test_Calibration.R
================================================
# test_Calibration.R
# ::rtemis::
# EDG rtemis.org

# Key
# {Algorithm}[method]<Class> Further conditions

# Setup ----
# library(rtemis)
# library(testthat)
library(data.table)

# Data ----
## Regression Data ----
n <- 400
x <- rnormmat(n, 5, seed = 2025)
g <- factor(sample(c("A", "B"), n, replace = TRUE))
y <- x[, 3] + x[, 5] + ifelse(g == "A", 2, -1) + rnorm(n)
datr <- data.table(x, g, y)
resr <- resample(datr)
datr_train <- datr[resr$Fold_1, ]
datr_test <- datr[-resr$Fold_1, ]

## Classification Data ----
### Binary ----
datc2 <- data.frame(
  gn = factor(sample(c("alpha", "beta", "gamma"), 100, replace = TRUE)),
  iris[51:150, ]
)
datc2$Species <- factor(datc2$Species)
resc2 <- resample(datc2)
datc2_train <- datc2[resc2$Fold_1, ]
datc2_test <- datc2[-resc2$Fold_1, ]

### 3-class ----
datc3 <- iris
resc3 <- resample(datc3)
datc3_train <- datc3[resc3$Fold_1, ]
datc3_test <- datc3[-resc3$Fold_1, ]


================================================
FILE: tests/testthat/test_CheckData.R
================================================
# test_CheckData.R
# ::rtemis::
# 2025- EDG rtemis.org

# %% check_data() ----
test_that("check_data() succeeds", {
  x <- data.frame(
    a = c(1, 2, 3, NA),
    b = c("A", "B", "C", "D"),
    c = c(1.5, NA, 3.5, NA)
  )
  x_cd <- check_data(x, get_na_case_pct = TRUE, get_na_feature_pct = TRUE)
  expect_s7_class(x_cd, CheckData)
  expect_equal(x_cd$n_na, 3)
  expect_equal(x_cd$n_cols_anyna, 2)
  expect_equal(nrow(x_cd$na_feature_pct), 2)
  expect_equal(x_cd$na_feature_pct$Feature, c("a", "c"))
  expect_equal(x_cd$na_feature_pct$Pct_NA, c(0.25, 0.5))
  expect_equal(nrow(x_cd$na_case_pct), 2)
  expect_equal(x_cd$na_case_pct$Case, c(2, 4))
  expect_equal(x_cd$na_case_pct$Pct_NA, c(1 / 3, 2 / 3))
})


================================================
FILE: tests/testthat/test_Clustering.R
================================================
# test_Clustering.R
# ::rtemis::
# 2025 EDG rtemis.org

# Data ----
x <- iris[, -5]

# setup_KMeans ----
test_that("setup_KMeans() succeeds", {
  expect_s7_class(setup_KMeans(), KMeansConfig)
})

# setup_KMeans throws error ----
test_that("setup_KMeans() throws error with bad values or wrong types", {
  expect_error(setup_KMeans(k = -1L))
  expect_error(setup_KMeans(dist = "foo"))
})

# cluster KMeans ----
test_that("cluster_KMeans() succeeds", {
  iris_kmeans <- cluster(
    x,
    algorithm = "kmeans",
    config = setup_KMeans(k = 3L)
  )
  expect_s7_class(iris_kmeans, Clustering)
})

# cluster KMeans with k = 10 ----
test_that("cluster_KMeans() with k = 10 succeeds", {
  skip_if_not_installed("flexclust")
  iris_kmeans10 <- cluster(
    x,
    algorithm = "kmeans",
    config = setup_KMeans(k = 10L)
  )
  expect_s7_class(iris_kmeans10, Clustering)
})

# setup_HardCL ----
test_that("setup_HardCL() succeeds", {
  expect_s7_class(setup_HardCL(), HardCLConfig)
})

# cluster HardCL ----
test_that("cluster_HardCL() succeeds", {
  skip_if_not_installed("flexclust")
  iris_hardcl <- cluster(
    x,
    algorithm = "HardCL",
    config = setup_HardCL(k = 3L)
  )
  expect_s7_class(iris_hardcl, Clustering)
})

# setup_NeuralGas ----
test_that("setup_NeuralGas() succeeds", {
  expect_s7_class(setup_NeuralGas(), NeuralGasConfig)
})

# cluster NeuralGas ----
test_that("cluster_NeuralGas() succeeds", {
  skip_if_not_installed("flexclust")
  iris_neuralgas <- cluster(
    x,
    algorithm = "NeuralGas",
    config = setup_NeuralGas(k = 3L)
  )
  expect_s7_class(iris_neuralgas, Clustering)
})

# setup_CMeans ----
test_that("setup_CMeans() succeeds", {
  expect_s7_class(setup_CMeans(), CMeansConfig)
})

# cluster CMeans ----
test_that("cluster_CMeans() succeeds", {
  skip_if_not_installed("e1071")
  iris_cmeans <- cluster(
    x,
    algorithm = "CMeans",
    config = setup_CMeans(k = 3L)
  )
  expect_s7_class(iris_cmeans, Clustering)
})

# setup_DBSCAN ----
test_that("setup_DBSCAN() succeeds", {
  expect_s7_class(setup_DBSCAN(), DBSCANConfig)
})

# cluster DBSCAN ----
test_that("cluster_DBSCAN() succeeds", {
  skip_if_not_installed("dbscan")
  iris_dbscan <- cluster(
    x,
    algorithm = "DBSCAN",
    config = setup_DBSCAN(eps = 0.5, min_points = 5L)
  )
  expect_s7_class(iris_dbscan, Clustering)
})


================================================
FILE: tests/testthat/test_Decomposition.R
================================================
# test_Decomposition.R
# ::rtemis::
# 2025 EDG rtemis.org

# Data ----
x <- iris[, -5]

# PCA ----
test_that("setup_PCA() succeeds", {
  config <- setup_PCA()
  expect_s7_class(config, PCAConfig)
})

test_that("decomp() PCA succeeds", {
  iris_pca <- decomp(x, algorithm = "pca", config = setup_PCA())
  iris_pca
  expect_s7_class(iris_pca, Decomposition)
})

# ICA ----
test_that("setup_ICA() succeeds", {
  config <- setup_ICA()
  expect_s7_class(config, ICAConfig)
})

test_that("decomp() ICA succeeds", {
  skip_if_not_installed("fastICA")
  iris_ica <- decomp(x, algorithm = "ica", config = setup_ICA())
  expect_s7_class(iris_ica, Decomposition)
})

# NMF ----
test_that("setup_NMF() succeeds", {
  config <- setup_NMF()
  expect_s7_class(config, NMFConfig)
})

test_that("decomp() NMF succeeds", {
  skip_if_not_installed("NMF")
  iris_nmf <- decomp(x, algorithm = "nmf", config = setup_NMF())
  expect_s7_class(iris_nmf, Decomposition)
})

# UMAP ----
test_that("setup_UMAP() succeeds", {
  config <- setup_UMAP()
  expect_s7_class(config, UMAPConfig)
})

test_that("decomp() UMAP succeeds", {
  skip_if_not_installed("uwot")
  iris_umap <- decomp(x, algorithm = "umap", config = setup_UMAP())
  iris_umap <- decomp(
    x,
    algorithm = "umap",
    config = setup_UMAP(n_neighbors = 20L)
  )
  expect_s7_class(iris_umap, Decomposition)
})

# t-SNE ----
test_that("setup_tSNE() succeeds", {
  config <- setup_tSNE()
  expect_s7_class(config, tSNEConfig)
})

# Test that t-SNE fails with duplicates
test_that("decomp() t-SNE fails with duplicates", {
  skip_if_not_installed("Rtsne")
  expect_error(decomp(x, algorithm = "tsne"))
})

# Test that t-SNE works after removing duplicates
test_that("decomp() t-SNE succeeds after removing duplicates", {
  skip_if_not_installed("Rtsne")
  xp <- preprocess(x, setup_Preprocessor(remove_duplicates = TRUE))
  iris_tsne <- decomp(
    xp@preprocessed,
    algorithm = "tsne",
    config = setup_tSNE()
  )
  expect_s7_class(iris_tsne, Decomposition)
})

# Isomap ----
test_that("setup_Isomap() succeeds", {
  config <- setup_Isomap()
  expect_s7_class(config, IsomapConfig)
})

test_that("decomp() Isomap succeeds", {
  skip_if_not_installed("vegan")
  iris_isomap <- decomp(x, algorithm = "isomap", config = setup_Isomap())
  expect_s7_class(iris_isomap, Decomposition)
})


================================================
FILE: tests/testthat/test_ExecutionConfig.R
================================================
# test_ExecutionConfig.R
# ::rtemis::
# 2026- EDG rtemis.org

# library(testthat)

# %% ExecutionConfig ----
ec <- ExecutionConfig(
  backend = "future",
  n_workers = 4L,
  future_plan = "multisession"
)
ec
testthat::test_that("ExecutionConfig() works", {
  expect_s7_class(
    ec,
    ExecutionConfig
  )
})

# %% setup_ExecutionConfig() ----
ec <- setup_ExecutionConfig(
  backend = "future",
  n_workers = 4L,
  future_plan = "multisession"
)
testthat::test_that("setup_ExecutionConfig() works", {
  expect_s7_class(
    ec,
    ExecutionConfig
  )
})


================================================
FILE: tests/testthat/test_Hyperparameters.R
================================================
# test_Hyperparameters.R
# ::rtemis::
# 2025 EDG rtemis.org

# Hyperparameters ----
hpr <- Hyperparameters(
  algorithm = "Custom",
  hyperparameters = list(alpha = c(0, 1), beta = 2),
  tunable_hyperparameters = "alpha",
  fixed_hyperparameters = "beta"
)
test_that("Hyperparameters succeeds", {
  expect_s7_class(hpr, Hyperparameters)
  # test that tuned is set correctly
  expect_identical(hpr@tuned, 0L)
})

# CARTHyperparameters ----
test_that("CARTHyperparameters() errors", {
  expect_error(CARTHyperparameters())
})

# get_hyperparams_need_tuning ----
test_that("get_hyperparams_need_tuning() succeeds", {
  expect_type(get_hyperparams_need_tuning(hpr), "list")
})

# Check printing of hp that need tuning ----
# CARTHyperparameters ----
# setup_CART ----
cart_hpr <- setup_CART(
  prune_cp = c(.001, .01, .1),
  minsplit = c(2L, 10L),
  minbucket = c(1L, 10L)
)
test_that("setup_CART() succeeds", {
  expect_s7_class(cart_hpr, CARTHyperparameters)
})

# needs_tuning ----
test_that("needs_tuning() succeeds", {
  expect_type(needs_tuning(cart_hpr), "logical")
})

# GLMNETHyperparameters ----
test_that("GLMNETHyperparameters() errors", {
  expect_error(GLMNETHyperparameters())
})

# setup_GLMNET ----
test_that("setup_GLMNET() succeeds", {
  expect_s7_class(setup_GLMNET(), GLMNETHyperparameters)
})

# LightCARTHyperparameters ----
test_that("LightCARTHyperparameters() errors", {
  expect_error(LightCARTHyperparameters())
})

# setup_LightCART ----
test_that("setup_LightCART() succeeds", {
  expect_s7_class(setup_LightCART(), LightCARTHyperparameters)
})

# LightRFHyperparameters ----
test_that("LightRFHyperparameters() errors", {
  expect_error(LightRFHyperparameters())
})

# setup_LightRF ----
test_that("setup_LightRF() succeeds", {
  lrf_hpr <- setup_LightRF()
  lrf_hpr
  expect_s7_class(lrf_hpr, LightRFHyperparameters)
})

# LightGBMHyperparameters ----
test_that("LightGBMHyperparameters() errors", {
  expect_error(LightGBMHyperparameters())
})

# setup_LightGBM ----
test_that("setup_LightGBM() succeeds", {
  lgbm_hpr <- setup_LightGBM(
    num_leaves = c(4, 8, 16),
    learning_rate = c(.001, .01, .1)
  )
  expect_s7_class(setup_LightGBM(), LightGBMHyperparameters)
})

# LightRuleFitHyperparameters ----
test_that("LightRuleFitHyperparameters() errors", {
  expect_error(LightRuleFitHyperparameters())
})

# setup_LightRuleFit ----
test_that("setup_LightRuleFit() succeeds", {
  expect_s7_class(setup_LightRuleFit(), LightRuleFitHyperparameters)
})

# IsotonicHyperparameters ----
test_that("IsotonicHyperparameters() errors", {
  expect_error(IsotonicHyperparameters())
})

# setup_Isotonic ----
test_that("setup_Isotonic() succeeds", {
  expect_s7_class(setup_Isotonic(), IsotonicHyperparameters)
})

# RadialSVMHyperparameters ----
test_that("RadialSVMHyperparameters() errors", {
  expect_error(RadialSVMHyperparameters())
})

# setup_LinearSVM ----
test_that("setup_LinearSVM() succeeds", {
  expect_s7_class(setup_LinearSVM(), LinearSVMHyperparameters)
})

# setup_RadialSVM ----
test_that("setup_RadialSVM() succeeds", {
  expect_s7_class(setup_RadialSVM(), RadialSVMHyperparameters)
})

# TabNetHyperparameters ----
test_that("TabNetHyperparameters() errors", {
  expect_error(TabNetHyperparameters())
})

# setup_TabNet ----
test_that("setup_TabNet() succeeds", {
  expect_s7_class(setup_TabNet(), TabNetHyperparameters)
})

# setup_Ranger ----
test_that("setup_Ranger() succeeds", {
  expect_s7_class(setup_Ranger(), RangerHyperparameters)
})


================================================
FILE: tests/testthat/test_Metrics.R
================================================
# test_Metrics.R
# ::rtemis::
# 2025 EDG rtemis.org

# Regression Data ----
set.seed(2025)
true <- rnorm(500)
predicted <- true + rnorm(500) / 2
predicted2 <- true + rnorm(500) / 2

# RegressionMetrics ----
reg_metrics <- regression_metrics(true, predicted, sample = "Training")
reg_metrics
test_that("regression_metrics() succeeds", {
  expect_s7_class(regression_metrics(true, predicted), RegressionMetrics)
})
reg_metrics2 <- regression_metrics(true, predicted2, sample = "Test")

# Classification Data ----
true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b"))
predicted_labels <- factor(c("a", "b", "a", "b", "b", "a", "b", "b", "b", "a"))
predicted_prob <- c(0.3, 0.6, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39)
predicted_prob2 <- c(0.2, 0.52, 0.28, 0.85, 0.64, 0.45, 0.9, 0.78, 0.78, 0.47)

# ClassificationMetrics ----
class_metrics1 <- classification_metrics(
  true_labels,
  predicted_labels,
  predicted_prob,
  sample = "Training"
)
class_metrics2 <- classification_metrics(
  true_labels,
  predicted_labels,
  predicted_prob2,
  sample = "Test"
)

test_that("classification_metrics() succeeds", {
  expect_s7_class(class_metrics1, ClassificationMetrics)
  expect_s7_class(class_metrics2, ClassificationMetrics)
})

# Test that class_metrics2 has higher AUC and lower Brier score than class_metrics1
test_that("classification_metrics() returns correct metrics", {
  expect_true(
    class_metrics2@metrics[["Overall"]][["AUC"]] >
      class_metrics1@metrics[["Overall"]][["AUC"]]
  )
  expect_true(
    class_metrics2@metrics[["Overall"]][["Brier_Score"]] <
      class_metrics1@metrics[["Overall"]][["Brier_Score"]]
  )
})

# RegressionMetricsRes ----
res_metrics <- list(mod1 = reg_metrics, mod2 = reg_metrics2)
rmcv <- RegressionMetricsRes(
  sample = "Test",
  res_metrics = res_metrics
)
rmcv
test_that("RegressionMetricsRes() succeeds", {
  expect_s7_class(rmcv, RegressionMetricsRes)
})

# ClassificationMetricsRes ----
res_metrics <- list(mod1 = class_metrics1, mod2 = class_metrics2)
cmcv <- ClassificationMetricsRes(
  sample = "Test",
  res_metrics = res_metrics
)
cmcv

test_that("ClassificationMetricsRes() succeeds", {
  expect_s7_class(cmcv, ClassificationMetricsRes)
})


================================================
FILE: tests/testthat/test_Preprocessor.R
================================================
# test_Preprocessor.R
# ::rtemis::
# 2025 EDG rtemis.org

# library(testthat)

# PreprocessorConfig ----
prp <- setup_Preprocessor()
prp
testthat::test_that("setup_Preprocessor() succeeds", {
  expect_s7_class(setup_Preprocessor(), PreprocessorConfig)
})

prp <- setup_Preprocessor(
  remove_constants = TRUE,
  remove_duplicates = TRUE
)
testthat::test_that("setup_Preprocessor() succeeds", {
  expect_s7_class(prp, PreprocessorConfig)
})

# Preprocessor: preprocess(PreprocessorConfig) ----
res <- resample(iris, setup_Resampler(seed = 2025))
iris_train <- iris[res$Fold_1, ]
iris_test <- iris[-res$Fold_1, ]
iris_Pre <- preprocess(
  iris_train,
  setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE)
)
test_that("preprocess(x, PreprocessorConfig) succeeds", {
  expect_s7_class(iris_Pre, Preprocessor)
})
iris_Pre
iris_Pre@preprocessed
iris_Pre@values

iris_test_Pre <- preprocess(iris_test, iris_Pre)
test_that("preprocess(x, Preprocessor) succeeds", {
  expect_s7_class(iris_test_Pre, Preprocessor)
})

iris_Pre_too <- preprocess(
  iris_train,
  setup_Preprocessor(remove_duplicates = TRUE, scale = TRUE, center = TRUE),
  dat_test = iris_test
)
test_that("preprocess(x, PreprocessorConfig) succeeds", {
  expect_s7_class(iris_Pre_too, Preprocessor)
})

test_that("preprocess(x, PreprocessorConfig) and preprocess(x, Preprocessor) give same test set", {
  expect_equal(iris_Pre_too@preprocessed$test, iris_test_Pre@preprocessed)
})

# impute meanMode ----
x <- iris
# Continuous
x[10:15, 1] <- NA
# Categorical
x[20:25, 5] <- NA
xp <- preprocess(
  x,
  setup_Preprocessor(impute = TRUE, impute_type = "meanMode")
)[["preprocessed"]]

test_that("impute meanMode works", {
  expect_false(anyNA(xp))
})

# Test one_hot ----
n <- 10
x <- rnormmat(n, 5, seed = 2025)
g <- factor(sample(c("A", "B"), n, replace = TRUE))
y <- x[, 3] + x[, 5] + ifelse(g == "A", 2, -1) + rnorm(n)
datr <- data.frame(x, g, y)
datr_onehot <- preprocess(
  datr,
  setup_Preprocessor(one_hot = TRUE)
)[["preprocessed"]]
test_that("one_hot.data.frame works", {
  expect_s3_class(datr_onehot, "data.frame")
})


================================================
FILE: tests/testthat/test_Resampler.R
================================================
# test_Resampler.R
# ::rtemis::
# EDG rtemis.org

# library(testthat)

# StratSubConfig ----
test_that("StratSubConfig succeeds", {
  rsp <- StratSubConfig(
    n = 10L,
    stratify_var = NULL,
    train_p = .75,
    strat_n_bins = 4L,
    id_strat = NULL,
    seed = NULL
  )
  expect_s7_class(rsp, StratSubConfig)
})

# KFoldConfig ----
test_that("KFoldConfig succeeds", {
  rsp <- KFoldConfig(
    n = 10L,
    stratify_var = NULL,
    strat_n_bins = 4L,
    id_strat = NULL,
    seed = NULL
  )
  expect_s7_class(rsp, KFoldConfig)
})

# BootstrapConfig ----
test_that("BootstrapConfig succeeds", {
  rsp <- BootstrapConfig(
    n = 10L,
    id_strat = NULL,
    seed = NULL
  )
  expect_s7_class(rsp, BootstrapConfig)
})

# StratBootConfig ----
test_that("StratBootConfig succeeds", {
  rsp <- StratBootConfig(
    n = 10L,
    stratify_var = NULL,
    train_p = .75,
    strat_n_bins = 4L,
    target_length = NULL,
    id_strat = NULL,
    seed = NULL
  )
  expect_s7_class(rsp, StratBootConfig)
})

# LOOCVConfig ----
test_that("LOOCVConfig succeeds", {
  rsp <- LOOCVConfig(
    n = 10L
  )
  expect_s7_class(rsp, LOOCVConfig)
})

# CustomConfig ----
test_that("CustomConfig succeeds", {
  rsp <- CustomConfig(
    n = 10L
  )
  expect_s7_class(rsp, CustomConfig)
})

# setup_Resampler() defaults ----
test_that("setup_Resampler() succeeds", {
  rsp <- setup_Resampler()
  expect_s7_class(rsp, ResamplerConfig)
})

# setup_Resampler() kfold ----
test_that("setup_Resampler() kfold succeeds", {
  rsp <- setup_Resampler(type = "KFold")
  expect_s7_class(rsp, KFoldConfig)
})

# setup_Resampler() strat_sub ----
test_that("setup_Resampler() strat_sub succeeds", {
  rsp <- setup_Resampler(type = "StratSub")
  expect_s7_class(rsp, StratSubConfig)
})

# setup_Resampler() strat_boot ----
test_that("setup_Resampler() strat_boot succeeds", {
  rsp <- setup_Resampler(type = "StratBoot")
  expect_s7_class(rsp, StratBootConfig)
})

test_that("setup_Resampler() strat_boot fails with invalid train_p", {
  expect_error(
    setup_Resampler(type = "StratBoot", train_p = 1)
  )
})

# setup_Resampler() bootstrap ----
test_that("setup_Resampler() bootstrap succeeds", {
  rsp <- setup_Resampler(type = "Bootstrap")
  expect_s7_class(rsp, BootstrapConfig)
})

# setup_Resampler() loocv ----
test_that("setup_Resampler() loocv succeeds", {
  rsp <- setup_Resampler(type = "LOOCV")
  expect_s7_class(rsp, LOOCVConfig)
})

# Resampler ----
test_that("Resampler() succeeds", {
  res <- Resampler(
    type = "Custom",
    resamples = list(),
    config = setup_Resampler()
  )
  expect_s7_class(res, Resampler)
})

# resample() vector ----
## KFold ----
test_that("resample() vector succeeds", {
  res <- resample(iris[[1]], setup_Resampler(type = "KFold"))
  expect_s7_class(res, Resampler)
})

## StratSub ----
test_that("resample() vector succeeds with StratSub", {
  res <- resample(iris[[1]], setup_Resampler(type = "StratSub"))
  expect_s7_class(res, Resampler)
})

## StratBoot ----
test_that("resample() vector succeeds with StratBoot", {
  res <- resample(iris[[1]], setup_Resampler(type = "StratBoot"))
  expect_s7_class(res, Resampler)
})

## Bootstrap ----
test_that("resample() vector succeeds with Bootstrap", {
  res <- resample(iris[[1]], setup_Resampler(type = "Bootstrap"))
  expect_s7_class(res, Resampler)
})

## LOOCV ----
test_that("resample() vector succeeds with LOOCV", {
  res <- resample(iris[[1]], setup_Resampler(type = "LOOCV"))
  expect_s7_class(res, Resampler)
})

# resample() data.frame ----
test_that("resample() data.frame succeeds", {
  res <- resample(iris, setup_Resampler())
  expect_s7_class(res, Resampler)
})

# resample() data.table ----
test_that("resample() data.table succeeds", {
  res <- resample(as.data.table(iris), setup_Resampler())
  expect_s7_class(res, Resampler)
})


================================================
FILE: tests/testthat/test_SuperConfig.R
================================================
# test_SupervisedConfig.R
# ::rtemis::
# 2026- EDG rtemis.org

# %% SuperConfig ----
test_that("SuperConfig() succeeds", {
  sc <- SuperConfig(
    dat_training_path = "train.csv",
    dat_validation_path = "validation.csv",
    dat_test_path = "test.csv",
    weights = NULL,
    algorithm = "GLMNET",
    preprocessor_config = setup_Preprocessor(),
    hyperparameters = setup_GLMNET(),
    tuner_config = setup_GridSearch(),
    outer_resampling_config = setup_Resampler(),
    execution_config = setup_ExecutionConfig(),
    question = "Can we predict the future from the past?",
    outdir = "results/",
    verbosity = 1L
  )
  expect_s7_class(sc, SuperConfig)
})

# %% setup_SuperConfig() ----
test_that("setup_SuperConfig() succeeds", {
  sc <- setup_SuperConfig(
    dat_training_path = "train.csv",
    dat_validation_path = "validation.csv",
    dat_test_path = "test.csv",
    weights = NULL,
    preprocessor_config = setup_Preprocessor(),
    algorithm = "LightGBM",
    hyperparameters = setup_LightGBM(),
    tuner_config = setup_GridSearch(),
    outer_resampling_config = setup_Resampler(),
    execution_config = setup_ExecutionConfig(),
    question = "Can we predict the future from the past?",
    outdir = "models/",
    verbosity = 1L
  )
  expect_s7_class(sc, SuperConfig)
})


# %% train SuperConfig ----
test_that("train() works with SuperConfig", {
  testthat::skip("For local testing only; requires CSV file")
  x <- setup_SuperConfig(
    dat_training_path = "~/Data/iris.csv",
    dat_validation_path = NULL,
    dat_test_path = NULL,
    weights = NULL,
    preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE),
    algorithm = "LightRF",
    hyperparameters = setup_LightRF(),
    tuner_config = setup_GridSearch(),
    outer_resampling_config = setup_Resampler(),
    execution_config = setup_ExecutionConfig(),
    question = "Can we tell iris species apart given their measurements?",
    outdir = "models/",
    verbosity = 1L
  )
  mod <- train(x)
  expect_s7_class(mod, SupervisedRes)
})


# %% Test to_toml.SuperConfig ----
test_that("SuperConfig can be written to TOML", {
  x <- setup_SuperConfig(
    dat_training_path = "~/Data/iris.csv",
    dat_validation_path = NULL,
    dat_test_path = NULL,
    weights = NULL,
    preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE),
    algorithm = "LightRF",
    hyperparameters = setup_LightRF(),
    tuner_config = setup_GridSearch(),
    outer_resampling_config = setup_Resampler(),
    execution_config = setup_ExecutionConfig(),
    question = "Can we tell iris species apart given their measurements?",
    outdir = "models/",
    verbosity = 1L
  )
  toml_str <- to_toml(x)
  expect_type(toml_str, "character")
})


# %% write_toml.SuperConfig & read_config ----
test_that("SuperConfig can be written to TOML", {
  x <- setup_SuperConfig(
    dat_training_path = "~/Data/iris.csv",
    dat_validation_path = NULL,
    dat_test_path = NULL,
    weights = NULL,
    preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE),
    algorithm = "LightRF",
    hyperparameters = setup_LightRF(),
    tuner_config = setup_GridSearch(),
    outer_resampling_config = setup_Resampler(),
    execution_config = setup_ExecutionConfig(),
    question = "Can we tell iris species apart given their measurements?",
    outdir = "models/",
    verbosity = 1L
  )
  tmpdir <- tempdir()
  write_toml(x, file.path(tmpdir, "rtemis.toml"), overwrite = TRUE)
  testthat::expect_true(file.exists(file.path(tmpdir, "rtemis.toml")))
  xtoo <- read_config(file.path(tmpdir, "rtemis.toml"))
  expect_s7_class(xtoo, SuperConfig)
})


# %% Test to_yaml.SuperConfig ----
test_that("SuperConfig can be written to YAML", {
  x <- setup_SuperConfig(
    dat_training_path = "~/Data/iris.csv",
    dat_validation_path = NULL,
    dat_test_path = NULL,
    weights = NULL,
    preprocessor_config = setup_Preprocessor(remove_duplicates = TRUE),
    algorithm = "LightRF",
    hyperparameters = setup_LightRF(),
    tuner_config = setup_GridSearch(),
    outer_resampling_config = setup_Resampler(),
    execution_config = setup_ExecutionConfig(),
    question = "Can we tell iris species apart given their measurements?",
    outdir = "models/",
    verbosity = 1L
  )
  yaml_str <- to_yaml(x)
  expect_type(yaml_str, "character")
})


================================================
FILE: tests/testthat/test_SuperConfigLive.R
================================================
# test_SuperConfigLive.R
# ::rtemis::
# 2026- EDG rtemis.org
#
# Tests for `SuperConfigLive` and its `train()` dispatch arm. The wire-
# level integration (rtemislive train handler → SuperConfigLive) is
# covered in test_rtemislive_dispatch_data_jobs.R / test_rtemislive_serial.R.

library(data.table)

# Helpers --------------------------------------------------------------------

small_regression_dt <- function(seed = 2030L, n = 60L) {
  set.seed(seed)
  dt <- data.table(
    a = rnorm(n),
    b = rnorm(n),
    c = rnorm(n),
    y = NA_real_
  )
  dt[, y := a + 0.5 * b + rnorm(n)]
  dt
}


# Constructor / props -------------------------------------------------------

test_that("setup_SuperConfigLive returns a SuperConfigLive with expected props", {
  dt <- small_regression_dt()
  cfg <- setup_SuperConfigLive(
    dat_training = dt,
    algorithm = "glm"
  )
  expect_true(S7_inherits(cfg, SuperConfigLive))
  expect_identical(cfg@dat_training, dt)
  expect_null(cfg@dat_validation)
  expect_null(cfg@dat_test)
  expect_equal(cfg@algorithm, "glm")
  expect_null(cfg@outdir)
})

test_that("setup_SuperConfigLive enforces tabular type on dat_training", {
  expect_error(
    setup_SuperConfigLive(dat_training = "not a data frame"),
    regexp = "(class_tabular|tabular|data.frame|data.table)"
  )
})

test_that("setup_SuperConfigLive accepts a data.frame (not just data.table)", {
  df <- data.frame(x = 1:5, y = rnorm(5))
  cfg <- setup_SuperConfigLive(dat_training = df, algorithm = "glm")
  expect_s3_class(cfg@dat_training, "data.frame")
})


# train() SuperConfigLive dispatch ------------------------------------------

test_that("train(SuperConfigLive) runs end-to-end for a simple GLM regression", {
  dt <- small_regression_dt(seed = 2031L)
  cfg <- setup_SuperConfigLive(
    dat_training = dt,
    algorithm = "glm",
    verbosity = 0L
  )
  mod <- train(cfg)
  expect_true(S7_inherits(mod, Supervised))
  expect_equal(mod@algorithm, "GLM")
  expect_length(mod@predicted_training, nrow(dt))
})

test_that("train(SuperConfigLive) accepts an in-memory validation split", {
  dt <- small_regression_dt(seed = 2032L)
  val <- small_regression_dt(seed = 2033L, n = 20L)
  cfg <- setup_SuperConfigLive(
    dat_training = dt,
    dat_validation = val,
    algorithm = "glm",
    verbosity = 0L
  )
  mod <- train(cfg)
  expect_true(S7_inherits(mod, Supervised))
  expect_length(mod@predicted_validation, nrow(val))
})


================================================
FILE: tests/testthat/test_Supervised.R
================================================
# test_Supervised.R
# ::rtemis::
# EDG rtemis.org

# Key
# {Algorithm}[method]<Class> Further conditions

# Note
# We are using very small and simple datasets to reduce runtime.
# GLM models are expected to give warnings, including:
#   - "glm.fit: fitted probabilities numerically 0 or 1 occurred"
#   - "glm.fit: algorithm did not converge"

# %% Packages ----
library(data.table)

# Data ----
## Regression Data ----
n <- 400
x <- rnormmat(n, 5, seed = 2025)
g <- factor(sample(c("A", "B"), n, replace = TRUE))
y <- x[, 3] + x[, 5] + ifelse(g == "A", 2, -1) + rnorm(n)
datr <- data.table(x, g, y)
resr <- resample(datr)
datr_train <- datr[resr$Fold_1, ]
datr_test <- datr[-resr$Fold_1, ]

## Classification Data ----
### Binary ----
datc2 <- data.frame(
  gn = factor(sample(c("alpha", "beta", "gamma"), 100, replace = TRUE)),
  iris[51:150, ]
)
datc2$Species <- factor(datc2$Species)
resc2 <- resample(datc2)
datc2_train <- datc2[resc2$Fold_1, ]
datc2_test <- datc2[-resc2$Fold_1, ]

### 3-class ----
datc3 <- iris
resc3 <- resample(datc3)
datc3_train <- datc3[resc3$Fold_1, ]
datc3_test <- datc3[-resc3$Fold_1, ]

### Synthetic binary data where positive class is 10% of the data ----
# set.seed(2025)
# n <- 500
# datc2 <- data.frame(
#   x1 = rnorm(n),
#   x2 = rnorm(n),
#   x3 = rnorm(n),
#   g = factor(sample(c("A", "B"), n, replace = TRUE, prob = c(.1, .9)))
# )
# Binary outcome dependent on x2 and g, with levels "neg" and "pos", where "pos" is 10% of the data
# datc2$y <- factor(ifelse(datc2$x2 > 0 & datc2$g == "A", "pos", "neg"))
# resc2 <- resample(datc2)
# datc2_train <- datc2[resc2$Fold_1, ]
# datc2_test <- datc2[-resc2$Fold_1, ]

# Utils ----
test_that("class_imbalance() works", {
  expect_type(class_imbalance(outcome(datc2)), "double")
})

# --- GLM ------------------------------------------------------------------------------------------
## {GLM}[train]<Regression> ----
mod_r_glm <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "glm"
)
test_that("train() GLM Regression succeeds", {
  expect_s7_class(mod_r_glm, Regression)
})
test_that("train() GLM standard errors are available", {
  expect_type(mod_r_glm@se_training, "double")
  expect_type(mod_r_glm@se_test, "double")
})

## {GLM}[train]<Regression> Throw error with missing data ----
datr_train_na <- datr_train
datr_train_na[10:2, 1] <- NA
test_that("train() GLM Regression with missing data throws error", {
  expect_error(
    train(x = datr_train_na, dat_test = datr_test, algorithm = "glm")
  )
})

## {GLM}[predict]<Regression> ----
predicted <- predict(mod_r_glm, features(datr_test))
test_that("predict() GLM Regression succeeds", {
  expect_identical(mod_r_glm@predicted_test, predicted)
  expect_null(dim(predicted))
})

## {GLM}[train]<RegressionRes> ----
resmod_r_glm <- train(
  x = datr,
  algorithm = "glm",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
)
test_that("train() Res GLM Regression succeeds", {
  expect_s7_class(resmod_r_glm, RegressionRes)
})

## {GLM}[train]<Classification> ----
mod_c_glm <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "glm"
)
test_that("train() GLM Classification succeeds", {
  expect_s7_class(mod_c_glm, Classification)
})

## {GLM}[train]<Classification> IFW ----
mod_c_glm_ifw <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "glm",
  hyperparameters = setup_GLM(ifw = TRUE)
)
test_that("train() GLM Classification with IFW succeeds", {
  expect_s7_class(mod_c_glm_ifw, Classification)
})

## {GLM}[train]<ClassificationRes> ----
resmod_c_glm <- train(
  x = datc2,
  algorithm = "glm",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
)
test_that("train() GLM ClassificationRes succeeds", {
  expect_s7_class(resmod_c_glm, ClassificationRes)
})

# --- GLMNET ---------------------------------------------------------------------------------------

## {GLMNET}[train]<Regression> ----
mod_r_glmnet <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "glmnet",
  hyperparameters = setup_GLMNET(lambda = 0.01)
)
test_that("train() GLMNET Regression with fixed lambda succeeds", {
  expect_s7_class(mod_r_glmnet, Regression)
})

## {GLMNET}[predict]<Regression> ----
predicted <- predict(mod_r_glmnet, features(datr_test))
test_that("predict() GLMNET Regression succeeds", {
  expect_identical(mod_r_glmnet@predicted_test, predicted)
  expect_null(dim(predicted))
})

## {GLMNET}[train]<Regression> auto-lambda grid search using future ----
test_that(
  paste(
    "train > tune_GridSearch resets future plan after execution",
    "train() GLMNET Regression with auto-lambda grid search using future succeeds"
  ),
  {
    # for local testing only, can't assume multisession or multicore are available
    skip_if_not_installed("futurize")
    # Simulate user has set plan to multisession with 2 workers
    # with(future::plan("multisession", workers = 2L), local = TRUE)
    # Simulate user has set plan to sequential
    with(future::plan("sequential"), local = TRUE)
    # Run train with multicore and 4 workers
    modt_r_glmnet <- train(
      x = datr_train,
      dat_test = datr_test,
      algorithm = "glmnet",
      hyperparameters = setup_GLMNET(alpha = 1),
      execution_config = setup_ExecutionConfig(
        backend = "future",
        n_workers = 2L, # Limit to 2 workers for CRAN
        future_plan = "mirai_multisession" # which gets converted to "future.mirai::mirai_multisession"
      ),
      verbosity = 2L
    )
    # Check that model trained correctly
    expect_s7_class(modt_r_glmnet, Regression)
    # Check that future plan has been reset to "multisession" with 2 workers
    # expect_equal(rtemis:::identify_plan(), "multisession")
    # Check that future plan has been reset to "sequential"
    expect_equal(rtemis:::identify_plan(), "sequential")
    expect_equal(future::nbrOfWorkers(), 1L)
  }
)


## {GLMNET}[train]<Regression> /\Error sequential with >1 worker ----
test_that("sequential with >1 worker throws error", {
  skip_if_not_installed("futurize")
  expect_error(
    modt_r_glmnet <- train(
      x = datr_train,
      dat_test = datr_test,
      algorithm = "glmnet",
      hyperparameters = setup_GLMNET(alpha = 1),
      execution_config = setup_ExecutionConfig(
        backend = "future",
        future_plan = "sequential",
        n_workers = 2L
      )
    )
  )
})

## {GLMNET}[train]<Regression> auto-lambda grid search using mirai ----
test_that("train() GLMNET Regression with auto-lambda grid search using mirai succeeds", {
  skip_if_not_installed("mirai")
  modt_r_glmnet <- train(
    x = datr_train,
    dat_test = datr_test,
    algorithm = "glmnet",
    hyperparameters = setup_GLMNET(alpha = 1),
    execution_config = setup_ExecutionConfig(backend = "mirai", n_workers = 2L)
  )
  expect_s7_class(modt_r_glmnet, Regression)
})

## {GLMNET}[train]<Regression> auto-lambda + alpha grid search ----
test_that("train() GLMNET Regression with auto-lambda + alpha grid search succeeds", {
  modt_r_glmnet <- train(
    x = datr_train,
    dat_test = datr_test,
    algorithm = "glmnet",
    hyperparameters = setup_GLMNET(alpha = c(0, 1)),
    execution_config = setup_ExecutionConfig(backend = "none")
  )
  expect_s7_class(modt_r_glmnet, Regression)
})

## {GLMNET}[train]<RegressionRes> auto-lambda + alpha grid search ----
test_that("train() Res-GLMNET Regression with auto-lambda + alpha grid search succeeds", {
  resmodt_r_glmnet <- train(
    x = datr_train,
    algorithm = "glmnet",
    hyperparameters = setup_GLMNET(alpha = c(0.5, 1)),
    outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
    execution_config = setup_ExecutionConfig(backend = "none")
  )
  expect_s7_class(resmodt_r_glmnet, RegressionRes)
})

## {GLMNET}[train]<Classification> ----
modt_c_glmnet <- train(
  x = datc2_train,
  dat_test = datc2_test,
  hyperparameters = setup_GLMNET(alpha = 1, lambda = 0.01)
)
test_that("train() GLMNET Classification succeeds", {
  expect_s7_class(modt_c_glmnet, Classification)
})

## {GLMNET}[train]<Classification> Multiclass ----
test_that("train() GLMNET Multiclass Classification succeeds", {
  modt_c3_glmnet <- train(
    x = datc3_train,
    dat_test = datc3_test,
    hyperparameters = setup_GLMNET(alpha = 1),
    execution_config = setup_ExecutionConfig(backend = "none")
  )
  expect_s7_class(modt_c3_glmnet, Classification)
})

# --- GAM ------------------------------------------------------------------------------------------
## {GAM}[train]<Regression> spline & parametric ----
mod_r_gam <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "gam"
)
test_that("train() GAM Regression with spline + parametric terms succeeds.", {
  expect_s7_class(mod_r_gam, Regression)
})

## {GAM}[train]<Regression> spline only ----
mod_r_gam <- train(
  x = datr_train[, -6],
  dat_test = datr_test[, -6],
  algorithm = "gam"
)
test_that("train() GAM Regression with only spline terms succeeds.", {
  expect_s7_class(mod_r_gam, Regression)
})

## {GAM}[train]<Regression> parametric only ----
mod_r_gam <- train(
  x = datr_train[, 6:7],
  dat_test = datr_test[, 6:7],
  algorithm = "gam"
)
test_that("train() GAM Regression with only parametric terms succeeds.", {
  expect_s7_class(mod_r_gam, Regression)
})

## {GAM}[train]<Regression> grid search ----
modt_r_gam <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "gam",
  hyperparameters = setup_GAM(k = c(3, 5, 7))
)
test_that("train() GAM Regression with grid_search() succeeds", {
  expect_s7_class(modt_r_gam, Regression)
})

## {GAM}[predict]<Regression> ----
test_that("predict() GAM Regression works", {
  expect_error(predicted <- predict(modt_r_gam, datr_test))
  predicted <- predict(modt_r_gam, features(datr_test))
  expect_identical(modt_r_gam@predicted_test, predicted)
})

## {GAM}[train]<RegressionRes> ----
resmod_r_gam <- train(
  x = datr,
  algorithm = "gam",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
)

## {GAM}[train]<Classification> ----
mod_c_gam <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "gam"
)
test_that("train() GAM Classification succeeds", {
  expect_s7_class(mod_c_gam, Classification)
})

## {GAM}[train]<Classification> IFW ----
mod_c_gam_ifw <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "gam",
  hyperparameters = setup_GAM(ifw = TRUE)
)
test_that("train() GAM Classification with IFW succeeds", {
  expect_s7_class(mod_c_gam_ifw, Classification)
})

# --- LinearSVM ------------------------------------------------------------------------------------
## {LinearSVM}[train]<Regression> ----
mod_r_svml <- train(
  x = datr_train,
  dat_test = datr_test,
  hyperparameters = setup_LinearSVM()
)
test_that("train() LinearSVM Regression succeeds", {
  expect_s7_class(mod_r_svml, Regression)
})

## {LinearSVM}[train]<Regression> Tuned ----
modt_r_svml <- train(
  x = datr_train,
  dat_test = datr_test,
  hyperparameters = setup_LinearSVM(cost = c(1, 10)),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() LinearSVM Regression with tuning succeeds", {
  expect_s7_class(modt_r_svml, Regression)
})

## {LinearSVM}[train]<RegressionRes> ----
resmod_r_svml <- train(
  x = datr,
  algorithm = "linearsvm",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
)
test_that("train() Res LinearSVM Regression succeeds", {
  expect_s7_class(resmod_r_svml, RegressionRes)
})

## {LinearSVM}[train]<Classification> ----
mod_c_linearsvm <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "linearsvm"
)
test_that("train() LinearSVM Classification succeeds", {
  expect_s7_class(mod_c_linearsvm, Classification)
})

## {LinearSVM}[train]<Classification> Multiclass ----
mod_c3_linearsvm <- train(
  x = datc3_train,
  dat_test = datc3_test,
  algorithm = "linearsvm"
)
test_that("train() LinearSVM Multiclass Classification succeeds", {
  expect_s7_class(mod_c3_linearsvm, Classification)
})

## {LinearSVM}[train]<ClassificationRes> ----
resmod_c_linearsvm <- train(
  x = datc2,
  algorithm = "linearsvm",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Res LinearSVM Classification succeeds", {
  expect_s7_class(resmod_c_linearsvm, ClassificationRes)
})

# --- RadialSVM ------------------------------------------------------------------------------------
## {RadialSVM}[train]<Regression> ----
mod_r_svmr <- train(
  x = datr_train,
  dat_test = datr_test,
  hyperparameters = setup_RadialSVM()
)
test_that("train() RadialSVM Regression succeeds", {
  expect_s7_class(mod_r_svmr, Regression)
})

## {RadialSVM}[train]<Regression> Tuned ----
modt_r_svmr <- train(
  x = datr_train,
  dat_test = datr_test,
  hyperparameters = setup_RadialSVM(cost = c(1, 10, 100))
)
test_that("train() RadialSVM Regression with tuning succeeds", {
  expect_s7_class(modt_r_svmr, Regression)
})

## {RadialSVM}[train]<RegressionRes> ----
resmod_r_svmr <- train(
  x = datr,
  algorithm = "radialsvm",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Res RadialSVM Regression succeeds", {
  expect_s7_class(resmod_r_svmr, RegressionRes)
})

## {RadialSVM}[train]<RegressionRes> Tuned ----
resmodt_r_svmr <- train(
  x = datr,
  hyperparameters = setup_RadialSVM(cost = c(1, 10)),
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Res RadialSVM Regression with tuning succeeds", {
  expect_s7_class(resmodt_r_svmr, RegressionRes)
})

## {RadialSVM}[train]<Classification> ----
mod_c_radialsvm <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "radialsvm"
)
test_that("train() RadialSVM Classification succeeds", {
  expect_s7_class(mod_c_radialsvm, Classification)
})

## {RadialSVM}[train]<Classification> Tuned ----
modt_c_radialsvm <- train(
  x = datc2_train,
  dat_test = datc2_test,
  hyperparameters = setup_RadialSVM(cost = c(1, 10)),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() RadialSVM Classification with tuning succeeds", {
  expect_s7_class(modt_c_radialsvm, Classification)
})

## {RadialSVM}[train]<ClassificationRes> ----
resmod_c_radialsvm <- train(
  x = datc2,
  algorithm = "radialsvm",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Res RadialSVM Classification succeeds", {
  expect_s7_class(resmod_c_radialsvm, ClassificationRes)
})

## {RadialSVM}[train]<ClassificationRes> Tuned ----
resmodt_c_radialsvm <- train(
  x = datc2,
  hyperparameters = setup_RadialSVM(cost = c(1, 10)),
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Res RadialSVM Classification with tuning succeeds", {
  expect_s7_class(resmodt_c_radialsvm, ClassificationRes)
})

## {RadialSVM}[train]<Classification> Multiclass ----
modt_c3_radialsvm <- train(
  x = datc3_train,
  dat_test = datc3_test,
  hyperparameters = setup_RadialSVM()
)
test_that("train() RadialSVM Multiclass Classification succeeds", {
  expect_s7_class(modt_c3_radialsvm, Classification)
})

# --- CART -----------------------------------------------------------------------------------------
## {CART}[train]<Regression> ----
mod_r_cart <- train(
  datr_train,
  dat_test = datr_test,
  algorithm = "cart"
)
test_that("train() Regression succeeds", {
  expect_s7_class(mod_r_cart, Regression)
})

## {CART}[train]<Regression> Grid search ----
## {CART} Check tuned == 0----
hyperparameters <- setup_CART(
  maxdepth = c(1, 2, 10),
  minbucket = c(1L, 4L)
)
test_that("tuned field is set correctly", {
  expect_identical(hyperparameters@tuned, 0L)
})

modt_r_cart <- train(
  datr_train,
  dat_test = datr_test,
  hyperparameters = setup_CART(maxdepth = 2:3),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Regression with grid_search() succeeds", {
  expect_s7_class(modt_r_cart, Regression)
})

## {CART} Check tuned == 1----
test_that("tuned is set correctly", {
  expect_identical(modt_r_cart@hyperparameters@tuned, 1L)
})

## {CART}[train]<RegressionRes> ----
resmod_r_cart <- train(
  x = datr,
  hyperparameters = setup_CART(),
  outer_resampling_config = setup_Resampler(3L),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() RegressionRes succeeds", {
  expect_s7_class(resmod_r_cart, RegressionRes)
})

## {CART}[train]<RegressionRes> Tuned ----
resmodt_r_cart <- train(
  x = datr,
  hyperparameters = setup_CART(maxdepth = 1:2, prune_cp = c(.001, .01)),
  outer_resampling_config = setup_Resampler(3),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() RegressionRes succeeds", {
  expect_s7_class(resmodt_r_cart, RegressionRes)
})

## {CART}[train]<RegressionRes> prune_cp ----
resmod_r_cart <- train(
  x = datr,
  hyperparameters = setup_CART(prune_cp = c(.001, .01)),
  outer_resampling_config = setup_Resampler(3L),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() RegressionRes succeeds", {
  expect_s7_class(resmod_r_cart, RegressionRes)
})

## {CART}[train]<Classification> ----
# model <- train_CART(dat_training = datc2_train, dat_test = datc2_test)
# model$method #"class"
modt_c_cart <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "cart",
  hyperparameters = setup_CART(maxdepth = 1:2)
)
test_that("train() CART Classification succeeds", {
  expect_s7_class(modt_c_cart, Classification)
})

## {CART}[train]<Classification> IFW ----
mod_c_cart_ifw <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "cart",
  hyperparameters = setup_CART(
    ifw = TRUE
  )
)
test_that("train() CART Classification with IFW succeeds", {
  expect_s7_class(mod_c_cart_ifw, Classification)
})

## {CART}[train]<Classification> Grid search ----
modt_c_cart_tuned <- train(
  x = datc2_train,
  dat_test = datc2_test,
  hyperparameters = setup_CART(
    maxdepth = c(1L, 2L)
  ),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Classification with grid_search() succeeds", {
  expect_s7_class(modt_c_cart_tuned, Classification)
})

## {CART}[train]<ClassificationRes> ----
# Can be used to test different parallelization methods during tuning
resmodt_c_cart <- train(
  x = datc2,
  algorithm = "cart",
  hyperparameters = setup_CART(
    maxdepth = c(1L, 2L)
  ),
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() CART ClassificationRes succeeds", {
  expect_s7_class(resmodt_c_cart, ClassificationRes)
})

## {CART}[train]<Classification> Multiclass ----
modt_c3_cart <- train(
  x = datc3_train,
  dat_test = datc3_test,
  algorithm = "cart"
)
test_that("train() CART Multiclass Classification succeeds", {
  expect_s7_class(modt_c3_cart, Classification)
})

# --- LightCART ------------------------------------------------------------------------------------
## {LightCART}[train]<Regression> ----
mod_r_lightcart <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "lightcart"
)
test_that("train() LightCART Regression succeeds", {
  expect_s7_class(mod_r_lightcart, Regression)
})

mod_r_lightcartlin <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "lightcart",
  hyperparameters = setup_LightCART(
    linear_tree = TRUE
  )
)
test_that("train() LightCART Regression with linear_tree succeeds", {
  expect_s7_class(mod_r_lightcartlin, Regression)
  expect_identical(
    mod_r_lightcartlin@hyperparameters$linear_tree,
    mod_r_lightcartlin@model$params$linear_tree
  )
})

## {LightCART}[train]<Classification> ----
mod_c_lightcart <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "lightcart"
)
test_that("train() LightCART Classification succeeds", {
  expect_s7_class(mod_c_lightcart, Classification)
})

## {LightCART}[train]<Classification> Multiclass ----
modt_c3_lightcart <- train(
  x = datc3_train,
  dat_test = datc3_test,
  algorithm = "lightcart"
)
test_that("train() LightCART Multiclass Classification succeeds", {
  expect_s7_class(modt_c3_lightcart, Classification)
})

# --- LightRF --------------------------------------------------------------------------------------
## {LightRF}[train]<Regression> ----
mod_r_lightrf <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "lightrf",
  hyperparameters = setup_LightRF(
    nrounds = 20L,
    lambda_l1 = .1,
    lambda_l2 = .1
  )
)
test_that("train() LightRF Regression with l1, l2 succeeds", {
  expect_s7_class(mod_r_lightrf, Regression)
})

## {LightRF}[predict]<Regression> ----
predicted <- predict(mod_r_lightrf, features(datr_test))
test_that("predict() LightRF Regression succeeds", {
  expect_identical(mod_r_lightrf@predicted_test, predicted)
  expect_null(dim(predicted))
})

## {LightRF}[train]<Regression> Grid search ----
modt_r_lightrf <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "lightrf",
  hyperparameters = setup_LightRF(
    nrounds = 20L,
    lambda_l1 = c(0, .1)
  ),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() LightRF Regression with l1 tuning succeeds", {
  expect_s7_class(modt_r_lightrf, Regression)
})

## {LightRF}[train]<RegressionRes> ----
resmodt_r_lightrf <- train(
  x = datr,
  algorithm = "lightrf",
  hyperparameters = setup_LightRF(
    nrounds = 20L,
    lambda_l1 = c(0, 10)
  ),
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Res LightRF Regression with l1 tuning succeeds", {
  expect_s7_class(resmodt_r_lightrf, RegressionRes)
})

## {LightRF}[train]<Classification> ----
mod_c_lightrf <- train(
  x = datc2_train,
  dat_test = datc2_test,
  hyperparameters = setup_LightRF(nrounds = 20L)
)
test_that("train() LightRF Binary Classification succeeds", {
  expect_s7_class(mod_c_lightrf, Classification)
})

## {LightRF}[predict]<Classification> ----
predicted_prob_test <- predict(mod_c_lightrf, features(datc2_test))
test_that("predict() LightRF Classification succeeds", {
  expect_identical(mod_c_lightrf@predicted_prob_test, predicted_prob_test)
})

## {LightRF}[train]<Classification> Tuned ----
modt_c_lightrf <- train(
  x = datc2_train,
  dat_test = datc2_test,
  hyperparameters = setup_LightRF(nrounds = 20L, max_depth = c(-1, 5))
)
test_that("train() LightRF Binary Classification with tuning succeeds", {
  expect_s7_class(modt_c_lightrf, Classification)
})

## {LightRF}[train]<ClassificationRes> ----
resmod_c_lightrf <- train(
  x = datc2,
  hyperparameters = setup_LightRF(nrounds = 20L),
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() LightRF ClassificationRes succeeds", {
  expect_s7_class(resmod_c_lightrf, ClassificationRes)
})

## {LightRF}[train]<Classification> Multiclass ----
modt_c3_lightrf <- train(
  x = datc3_train,
  dat_test = datc3_test,
  hyperparameters = setup_LightRF(nrounds = 20L)
)
test_that("train() LightRF Multiclass Classification succeeds", {
  expect_s7_class(modt_c3_lightrf, Classification)
})

## {LightGBM}[train]<Regression> ----
mod_r_lightgbm <- train(
  x = datr_train,
  dat_test = datr_test,
  algorithm = "lightgbm",
  hyperparameters = setup_LightGBM(
    force_nrounds = 50
  )
)
test_that("train() LightGBM Regression succeeds", {
  expect_s7_class(mod_r_lightgbm, Regression)
})

## {LightGBM}[train]<Regression> Autotune nrounds ----
modt_r_lightgbm <- train(
  x = datr_train,
  dat_test = datr_test,
  hyperparameters = setup_LightGBM()
)
test_that("train() LightGBM Regression with autotune nrounds succeeds", {
  expect_s7_class(modt_r_lightgbm, Regression)
})

## {LightGBM}[train]<RegressionRes> Autotune nrounds ----
resmodt_r_lightgbm <- train(
  x = datr_train,
  hyperparameters = setup_LightGBM(max_nrounds = 50L),
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Res LightGBM Regression with autotune nrounds succeeds", {
  expect_s7_class(resmodt_r_lightgbm, RegressionRes)
})

## {LightGBM}[train]<Classification> ----
mod_c_lightgbm <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "lightgbm",
  # hyperparameters = setup_LightGBM(
  #   force_nrounds = 100L
  # ),
  tuner_config = setup_GridSearch(
    resampler_config = setup_Resampler(
      n_resamples = 3L,
      type = "KFold"
    )
  ),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() LightGBM Classification succeeds", {
  expect_s7_class(mod_c_lightgbm, Classification)
})

## {LightGBM}[train]<Classification> Multiclass ----
modt_c3_lightgbm <- train(
  x = datc3_train,
  dat_test = datc3_test,
  hyperparameters = setup_LightGBM(
    force_nrounds = 20L
  )
)
test_that("train() LightGBM Multiclass Classification succeeds", {
  expect_s7_class(modt_c3_lightgbm, Classification)
})

## {LightRuleFit}[train]<Regression> ----
mod_r_lightrlft_l1l2 <- train(
  x = datr_train,
  dat_test = datr_test,
  hyperparameters = setup_LightRuleFit(
    nrounds = 50L,
    lambda_l1 = 10,
    lambda_l2 = 10,
    lambda = 0.01
  )
)

test_that("train() LightRuleFit Regression with l1, l2 params passed", {
  expect_s7_class(mod_r_lightrlft_l1l2, Regression)
  expect_identical(
    mod_r_lightrlft_l1l2@model@model_lightgbm@model$params$lambda_l1,
    10
  )
  expect_identical(
    mod_r_lightrlft_l1l2@model@model_lightgbm@model$params$lambda_l2,
    10
  )
})

## {LightRuleFit}[train]<Classification> ----
mod_c_lightrlft <- train(
  x = datc2_train,
  dat_test = datc2_test,
  hyperparameters = setup_LightRuleFit(nrounds = 50L),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() LightRuleFit Binary Classification succeeds", {
  expect_s7_class(mod_c_lightrlft, Classification)
})

## {TabNet}[train]<Regression> ----
# Test if lantern is installed
if (torch::torch_is_installed()) {
  mod_r_tabnet <- train(
    x = datr_train,
    dat_test = datr_test,
    algorithm = "tabnet",
    hyperparameters = setup_TabNet(epochs = 3L, learn_rate = .01)
  )
  test_that("train() TabNet Regression succeeds", {
    expect_s7_class(mod_r_tabnet, Regression)
  })
}

## {TabNet}[train]<Classification> ----
if (torch::torch_is_installed()) {
  mod_c_tabnet <- train(
    x = datc2_train,
    dat_test = datc2_test,
    hyperparameters = setup_TabNet(epochs = 3L, learn_rate = .01)
  )
  test_that("train() TabNet Classification succeeds", {
    expect_s7_class(mod_c_tabnet, Classification)
  })
}

## {TabNet}[train]<Classification> Multiclass ----
if (torch::torch_is_installed()) {
  modt_c3_tabnet <- train(
    x = datc3_train,
    dat_test = datc3_test,
    hyperparameters = setup_TabNet(epochs = 3L, learn_rate = .01)
  )
  test_that("train() TabNet Multiclass Classification succeeds", {
    expect_s7_class(modt_c3_tabnet, Classification)
  })
}

## {Isotonic}[train]<Regression> ----
x <- rnorm(50)
y <- x^5 + rnorm(50)
dat <- data.table(x, y)
mod_iso <- train(dat, algorithm = "Isotonic")
test_that("train() Isotonic Regression succeeds", {
  expect_s7_class(mod_iso, Regression)
})

## {Isotonic}[train]<Classification> ----
set.seed(2025)
x <- rnorm(200)
y <- factor(ifelse(x > mean(x), "b", "a"))
x <- x + rnorm(200) / 3
dat <- data.frame(x, y)
cmod_iso <- train(dat, algorithm = "Isotonic")
test_that("train() Isotonic Classification succeeds", {
  expect_s7_class(cmod_iso, Classification)
})

# --- Ranger ---------------------------------------------------------------------------------------
## {Ranger}[train]<Regression> ----
mod_r_ranger <- train(
  x = datr_train,
  dat_test = datr_test,
  hyperparameters = setup_Ranger(num_trees = 50L)
)
test_that("train() Ranger Regression succeeds", {
  expect_s7_class(mod_r_ranger, Regression)
})

## {Ranger}[train]<Regression> Grid search ----
modt_r_ranger <- train(
  x = datr_train,
  dat_test = datr_test,
  hyperparameters = setup_Ranger(num_trees = 50L, mtry = c(3, 6)),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Ranger Regression with grid search succeeds", {
  expect_s7_class(modt_r_ranger, Regression)
})

## {Ranger}[train]<RegressionRes> ----
resmod_r_ranger <- train(
  x = datr,
  hyperparameters = setup_Ranger(num_trees = 5000L),
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
)
test_that("train() Res Ranger Regression succeeds", {
  expect_s7_class(resmod_r_ranger, RegressionRes)
})

## {Ranger}[train]<Classification> ----
mod_c_ranger <- train(
  x = datc2_train,
  dat_test = datc2_test,
  hyperparameters = setup_Ranger(num_trees = 10L)
)
test_that("train() Ranger Classification succeeds", {
  expect_s7_class(mod_c_ranger, Classification)
})

## {Ranger}[train]<Classification> Grid search ----
modt_c_ranger <- train(
  x = datc2_train,
  dat_test = datc2_test,
  hyperparameters = setup_Ranger(num_trees = 10L, mtry = c(2, 4)),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Ranger Classification with grid search succeeds", {
  expect_s7_class(modt_c_ranger, Classification)
})

## {Ranger}[train]<ClassificationRes> ----
resmod_c_ranger <- train(
  x = datc2,
  hyperparameters = setup_Ranger(num_trees = 10L),
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
  execution_config = setup_ExecutionConfig(backend = "none")
)
test_that("train() Res Ranger Classification succeeds", {
  expect_s7_class(resmod_c_ranger, ClassificationRes)
})

## {Ranger}[train]<Classification> Multiclass ----
modt_c3_ranger <- train(
  x = datc3_train,
  dat_test = datc3_test,
  hyperparameters = setup_Ranger(num_trees = 10L)
)
test_that("train() Ranger Multiclass Classification succeeds", {
  expect_s7_class(modt_c3_ranger, Classification)
})

# --- Predict SupervisedRes ------------------------------------------------------------------------

## {CART}[predict]<RegressionRes> ----
predicted_mean <- predict(resmod_r_cart, newdata = features(datr_test))
test_that("predict() SupervisedRes succeeds", {
  expect_true(length(predicted_mean) == nrow(datr_test))
})


# --- Calibration ----------------------------------------------------------------------------------
## {LightRF}[calibrate]<Classification> ----
# Calibrate mod_c_lightrf trained above
model <- mod_c_lightrf
predicted_probabilities <- model$predicted_prob_training
true_labels <- model$y_training
mod_c_lightrf_cal <- calibrate(
  mod_c_lightrf,
  predicted_probabilities = mod_c_lightrf$predicted_prob_training,
  true_labels = mod_c_lightrf$y_training
)
test_that("calibrate() succeeds on Classification", {
  expect_s7_class(mod_c_lightrf_cal, CalibratedClassification)
})

## {LightRF}[predict]<CalibratedClassification> ----
newdata <- features(datc2_test)
predicted_prob_test_cal <- predict(mod_c_lightrf_cal, newdata = newdata)
test_that("predict() CalibratedClassification succeeds", {
  expect_identical(
    mod_c_lightrf_cal@predicted_prob_test_calibrated,
    predicted_prob_test_cal
  )
})

# --- CalibratedClassificationRes ------------------------------------------------------------------
## {LightRF}[calibrate]<ClassificationRes>
resmod_c_lightrf_cal <- calibrate(resmod_c_lightrf)
test_that("calibrate() succeeds on ClassificationRes", {
  expect_s7_class(resmod_c_lightrf_cal, CalibratedClassificationRes)
})

## {GLM}[describe]<Regression> ----
test_that("describe.Regression returns character", {
  desc <- describe(mod_r_glm)
  expect_type(desc, "character")
})

## {GLM}[plot_true_pred]<Supervised> ----
test_that("plot_true_pred.Supervised creates a plotly object", {
  p <- plot_true_pred(mod_r_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[plot_true_pred]<Regression> ----
test_that("plot_true_pred creates a plotly object", {
  p <- plot_true_pred(mod_r_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[present]<Supervised> ----
test_that("present.Supervised creates a plotly object", {
  p <- present(mod_r_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[describe]<Classification> ----
test_that("describe.Classification returns character", {
  desc <- describe(mod_c_glm)
  expect_type(desc, "character")
})

## {GLM}[plot_true_pred]<Classification> ----
test_that("plot_true_pred.Classification creates a plotly object", {
  p <- plot_true_pred(mod_c_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[plot_true_pred]<Classification> ----
test_that("plot_true_pred creates a plotly object", {
  p <- plot_true_pred(mod_c_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[draw_roc]<Classification> ----
test_that("draw_roc creates a plotly object", {
  p <- draw_roc(
    true_labels = list(
      Training = mod_c_glm@y_training,
      Test = mod_c_glm@y_test
    ),
    predicted_prob = list(
      Training = mod_c_glm@predicted_prob_training,
      Test = mod_c_glm@predicted_prob_test
    )
  )
  expect_s3_class(p, "plotly")
})
test_that("plot_roc.Classification creates a plotly object", {
  p <- plot_roc(mod_c_glm)
  expect_s3_class(p, "plotly")
})

## {CART}[plot_roc]<ClassificationRes> Tuned ----
test_that("plot_roc.ClassificationRes creates a plotly object", {
  p <- plot_roc(resmodt_c_cart)
  expect_s3_class(p, "plotly")
})

## {GLM}[plot_metric]<SupervisedRes> ----
test_that("plot_metric.SupervisedRes creates a plotly object", {
  p <- plot_metric(resmod_r_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[plot_metric]<SupervisedRes> ----
test_that("plot_metric.SupervisedRes creates a plotly object", {
  p <- plot_metric(resmod_c_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[plot_true_pred]<RegressionRes> ----
test_that("plot_true_pred RegressionRes creates a plotly object", {
  p <- plot_true_pred(resmod_r_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[plot_true_pred]<ClassificationRes> ----
test_that("plot_true_pred ClassificationRes creates a plotly object", {
  p <- plot_true_pred(resmod_c_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[present]<Supervised> ----
test_that("present.Supervised creates a plotly object", {
  p <- present(mod_r_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[present]<Supervised> ----
test_that("present.Supervised creates a plotly object", {
  p <- present(mod_c_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[present]<RegressionRes> ----
test_that("present() RegressionRes object creates a plotly object", {
  p <- present(resmod_r_glm)
  expect_s3_class(p, "plotly")
})

## {GLM}[present]<ClassificationRes> ----
test_that("present() ClassificationRes object creates a plotly object", {
  p <- present(resmod_c_glm)
  expect_s3_class(p, "plotly")
})

## {Multi}[present]<RegressionRes> ----
test_that("present() multiple RegressionRes objects creates a plotly object", {
  p <- present(list(resmod_r_glm, resmod_r_cart))
  expect_s3_class(p, "plotly")
})

## {Multi}[present]<ClassificationRes> ----
test_that("present() multiple ClassificationRes objects creates a plotly object", {
  p <- present(list(resmod_c_glm, resmodt_c_cart))
  expect_s3_class(p, "plotly")
})

## {Multi}[present]<Regression> ----
test_that("present() multiple Regression objects creates a plotly object", {
  p <- present(list(mod_r_glm, mod_r_cart))
  expect_s3_class(p, "plotly")
})

## {CART}[plot_varimp]<RegressionRes> ----
test_that("plot_varimp RegressionRes creates a plotly object", {
  p <- plot_varimp(resmod_r_cart)
  expect_s3_class(p, "plotly")
})

## {GLM}[train]<Supervised> Outdir ----
test_that("train saves model to rds successfully", {
  temp_dir <- withr::local_tempdir()
  outdir <- file.path(temp_dir, "mod_r_glm")

  mod_r_glm <- train(
    x = datr_train,
    dat_test = datr_test,
    algorithm = "glm",
    outdir = outdir
  )
  expect_true(file.exists(file.path(outdir, "train_GLM.rds")))
})

## {GLM}[train]<SupervisedRes> Outdir ----
test_that("train saves SupervisedRes model to rds successfully", {
  temp_dir <- withr::local_tempdir()
  outdir <- file.path(temp_dir, "resmod_r_glm")
  resmod_r_glm <- train(
    x = datr,
    algorithm = "glm",
    outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold"),
    outdir = outdir
  )
  expect_true(file.exists(file.path(outdir, "train_GLM.rds")))
})

## {CART}[repr]<Classification> Tuned ----
modt_c_cart_repr <- repr(modt_c_cart, output_type = "ansi")
test_that("repr() Tuned Classification succeeds", {
  expect_type(modt_c_cart_repr, "character")
})

## {CART}[repr]<ClassificationRes> Tuned ----
resmodt_c_cart_repr <- repr(resmodt_c_cart, output_type = "ansi")
test_that("repr() Tuned ClassificationRes succeeds", {
  expect_type(resmodt_c_cart_repr, "character")
})

## {CART}[repr]<Regression> Tuned ----
modt_r_cart_repr <- repr(modt_r_cart, output_type = "ansi")
test_that("repr() Tuned Regression succeeds", {
  expect_type(modt_r_cart_repr, "character")
})

## {CART}[repr]<RegressionRes> Tuned ----
resmodt_r_cart_repr <- repr(resmodt_r_cart, output_type = "ansi")
test_that("repr() Tuned RegressionRes succeeds", {
  expect_type(resmodt_r_cart_repr, "character")
})

# --- Describe & present list of Supervised --------------------------------------------------------
## {Multi}[describe]<Classification> List ----
x <- list(
  modt_c_cart,
  mod_c_lightrf,
  mod_c_lightgbm
)
out <- describe(x)
test_that("describe() list of Classification objects returns character", {
  expect_type(out, "character")
})

## {Multi}[present]<Classification> List ----
plt <- present(x)
test_that("present() list of Classification objects returns plotly object", {
  expect_s3_class(plt, "plotly")
})

## {Multi}[describe]<Regression> List ----
x <- list(
  mod_r_glmnet,
  mod_r_svmr,
  mod_r_lightrf
)
out <- describe(x)
test_that("describe() list of Regression objects returns character", {
  expect_type(out, "character")
})

## {Multi}[present]<Regression> List ----
plt <- present(x)
test_that("present() list of Regression objects returns plotly object", {
  expect_s3_class(plt, "plotly")
})

# Describe & present list of SupervisedRes----

## {Multi}[describe]<ClassificationRes> List ----
x <- list(
  resmod_c_glm,
  resmod_c_linearsvm,
  resmod_c_lightrf
)
out <- describe(x)
test_that("describe() list of ClassificationRes objects returns character", {
  expect_type(out, "character")
})

## {Multi}[present]<ClassificationRes> List ----
plt <- present(x)
test_that("present() list of ClassificationRes objects returns plotly object", {
  expect_s3_class(plt, "plotly")
})

## {Multi}[describe]<RegressionRes> List ----
x <- list(
  resmod_r_glm,
  resmod_r_svml,
  resmodt_r_lightrf
)
out <- describe(x)
test_that("describe() list of RegressionRes objects returns character", {
  expect_type(out, "character")
})

## {Multi}[present]<RegressionRes> List ----
plt <- present(x)
test_that("present() list of RegressionRes objects returns plotly object", {
  expect_s3_class(plt, "plotly")
})

# --- CalibratedClassificationRes ------------------------------------------------------------------
## {GLM}[calibrate]<ClassificationRes> ----
# Using resmod_c_glm from above
resmod_c_glm_cal <- calibrate(resmod_c_glm)
test_that("calibrate() GLM ClassificationRes succeeds", {
  expect_s7_class(resmod_c_glm_cal, CalibratedClassificationRes)
})

## {GLM}[predict]<CalibratedClassificationRes> ----
test_that("predict() GLM CalibratedClassificationRes succeeds", {
  predicted_cal <- predict(resmod_c_glm_cal, features(datc2_test))
  expect_type(predicted_cal, "double")
  expect_length(predicted_cal, nrow(datc2_test))
})

## {CART}[calibrate]<ClassificationRes> ----
# Using resmodt_c_cart from above
resmodt_c_cart_cal <- calibrate(resmodt_c_cart)
test_that("calibrate() CART ClassificationRes succeeds", {
  expect_s7_class(resmodt_c_cart_cal, CalibratedClassificationRes)
})

## {CART}[predict]<CalibratedClassificationRes> ----
test_that("predict() CART CalibratedClassificationRes succeeds", {
  predicted_cal <- predict(resmodt_c_cart_cal, features(datc2_test))
  expect_type(predicted_cal, "double")
  expect_length(predicted_cal, nrow(datc2_test))
})


# %% Test preprocessing in train() is applied to test data in predict() ----
## {GLM}[train]<Classification> Preprocessing ----
mod_c_glm_pp <- train(
  x = datc2_train,
  dat_test = datc2_test,
  algorithm = "glm",
  preprocessor = setup_Preprocessor(
    scale = TRUE,
    center = TRUE
  )
)
test_that("train() with preprocessor creates a model with the preprocessor", {
  expect_s7_class(mod_c_glm_pp, Classification)
  expect_true(!is.null(mod_c_glm_pp@preprocessor))
})


================================================
FILE: tests/testthat/test_Theme.R
================================================
# test_Theme.R
# ::rtemis::
# 2025 EDG rtemis.org

# Theme ----
test_that("Theme succeeds", {
  expect_s7_class(Theme(), Theme)
})

# theme_black ----
test_that("theme_black succeeds", {
  expect_s7_class(theme_black(), Theme)
})

# theme_blackgrid
test_that("theme_blackgrid succeeds", {
  expect_s7_class(theme_blackgrid(), Theme)
})

# theme_light ----
test_that("theme_light succeeds", {
  expect_s7_class(theme_white(), Theme)
})

# Test `$` and `[[` methods ----
theme <- theme_darkgraygrid()
test_that("Theme$ and Theme[[ succeed", {
  expect_equal(theme[["fg"]], "#ffffff")
  expect_equal(theme[["fg"]], theme[["fg"]])
})


================================================
FILE: tests/testthat/test_Tuner.R
================================================
# test_Tuner.R
# ::rtemis::
# 2025 EDG rtemis.org

# Note: Tuning is tested in test_Supervised.R with `train()`

# TunerConfig ----
tn_pr <- setup_GridSearch()
tn_pr
desc(tn_pr)
test_that("TunerConfig succeeds", {
  expect_s7_class(TunerConfig(), TunerConfig)
})

# setup_GridSearch() ----
test_that("setup_GridSearch() succeeds", {
  expect_s7_class(setup_GridSearch(), GridSearchConfig)
})


================================================
FILE: tests/testthat/test_checks.R
================================================
# test_checks.R
# ::rtemis::
# 2025 EDG rtemis.org

# Test do_call ----
test_that("do_call() succeeds", {
  expect_equal(do_call(sum, list(1, 2, 3)), 6)
})


================================================
FILE: tests/testthat/test_colorsystem.R
================================================
# test_colorsystem.R
# ::rtemis::
# 2025 EDG rtemis.org

# show_col ----
x <- list(
  highlight_col = highlight_col,
  col_object = col_object,
  col_outer = col_outer,
  col_tuner = col_tuner,
  col_info = col_info
)

out <- show_col(x, title = "rtemis Color System")
test_that("show_col() works", {
  expect_true(is.character(out))
})

# fmt_gradient ----
out <- fmt_gradient(
  "Supervised",
  colors = c(rtemis_teal, rtemis_light_teal),
  bold = TRUE
)
test_that("fmt_gradient() works", {
  expect_true(is.character(out))
})


================================================
FILE: tests/testthat/test_draw.R
================================================
# test_draw.R
# ::rtemis::
# 2025 EDG rtemis.org

# draw_3Dscatter ----
test_that("draw_3Dscatter creates a plotly object and saves file", {
  # Check whether plotly and kaleido are available in reticulate
  temp_dir <- withr::local_tempdir()
  if (
    !requireNamespace("reticulate", quietly = TRUE) ||
      !reticulate::py_module_available("plotly") ||
      !reticulate::py_module_available("kaleido")
  ) {
    temp_file <- NULL
  } else {
    temp_file <- file.path(temp_dir, "draw_3Dscatter.pdf")
  }

  # Create the plot with file output
  p <- draw_3Dscatter(
    iris,
    group = iris$Species,
    theme = theme_darkgraygrid(),
    filename = temp_file
  )

  # Test that plotly object is created
  expect_s3_class(p, "plotly")

  # Test that file was successfully created by plotly/kaleido (only if temp_file is not NULL)
  if (!is.null(temp_file)) {
    expect_true(file.exists(temp_file))

    # Test that the file has content (not empty)
    file_info <- file.info(temp_file)
    expect_true(file_info$size > 0)

    # Test that it's a valid PDF file (starts with PDF header)
    file_content <- readBin(temp_file, "raw", n = 4)
    expect_equal(rawToChar(file_content), "%PDF")
  }
})

# draw_bar ----
test_that("draw_bar creates a plotly object", {
  p <- draw_bar(VADeaths, legend_xy = c(0, 1))
  expect_s3_class(p, "plotly")
})

# draw_box ----
test_that("draw_box creates a plotly object", {
  p <- draw_box(iris[, 1:4], group = iris[["Species"]], annotate_n = TRUE)
  expect_s3_class(p, "plotly")
})

# draw_calibration ----
test_that("draw_calibration creates a plotly object", {
  # Create a simple binary classification example
  set.seed(123)
  true_labels <- factor(sample(c("A", "B"), size = 100, replace = TRUE))
  predicted_prob <- runif(100)
  p <- draw_calibration(true_labels, predicted_prob)
  expect_s3_class(p, "plotly")
})

# draw_confusion ----
test_that("draw_confusion creates a plotly object", {
  true_labels <- factor(c("a", "a", "a", "b", "b", "b", "b", "b", "b", "b"))
  predicted_labels <- factor(c(
    "a",
    "b",
    "a",
    "b",
    "b",
    "a",
    "b",
    "b",
    "b",
    "a"
  ))
  predicted_prob <- c(0.3, 0.55, 0.45, 0.75, 0.57, 0.3, 0.8, 0.63, 0.62, 0.39)
  metrics <- classification_metrics(
    true_labels,
    predicted_labels,
    predicted_prob
  )
  p <- draw_confusion(metrics)
  expect_s3_class(p, "plotly")
})

# draw_dist ----
test_that("draw_dist creates a plotly object", {
  p <- draw_dist(iris[["Sepal.Length"]], group = iris[["Species"]])
  expect_s3_class(p, "plotly")
})

# draw_heatmap ----
test_that("draw_heatmap creates a plotly object", {
  x <- rnormmat(200, 20)
  xcor <- cor(x)
  p <- draw_heatmap(xcor)
  expect_s3_class(p, "plotly")
})

# draw_leaflet ----
test_that("draw_leaflet creates a leaflet object", {
  fips <- c(06075, 42101)
  population <- c(874961, 1579000)
  names <- c("SF", "Philly")
  p <- draw_leaflet(fips, population, names)
  expect_s3_class(p, "leaflet")
})

# draw_pie ----
test_that("draw_pie creates a plotly object", {
  p <- draw_pie(VADeaths[, 1, drop = FALSE])
  expect_s3_class(p, "plotly")
})

# draw_protein ----
test_that("draw_protein creates a plotly object", {
  tau <- c(
    "M",
    "A",
    "E",
    "P",
    "R",
    "Q",
    "E",
    "F",
    "E",
    "V",
    "M",
    "E",
    "D",
    "H",
    "A",
    "G",
    "T",
    "Y",
    "G",
    "L"
  )
  p <- draw_protein(tau)
  expect_s3_class(p, "plotly")
})

# draw_pvals ----
test_that("draw_pvals creates a plotly object", {
  p <- draw_pvals(
    c(0.01, 0.02, 0.03),
    xnames = c("Feature1", "Feature2", "Feature3")
  )
  expect_s3_class(p, "plotly")
})

# draw_scatter ----
test_that("draw_scatter creates a plotly object", {
  p <- draw_scatter(
    iris[["Sepal.Length"]],
    iris[["Petal.Length"]],
    group = iris[["Species"]],
    fit = "gam",
    se_fit = TRUE
  )
  expect_s3_class(p, "plotly")
})

# draw_spectrogram ----
test_that("draw_spectrogram creates a plotly object", {
  time <- seq(0, 1, length.out = 100)
  freq <- seq(1, 100, length.out = 100)
  power <- outer(time, freq, function(t, f) sin(t) * cos(f))
  p <- draw_spectrogram(
    x = time,
    y = freq,
    z = power
  )
  expect_s3_class(p, "plotly")
})

# draw_survfit ----
test_that("draw_survfit creates a plotly object", {
  data(cancer, package = "survival")
  sf2 <- survival::survfit(survival::Surv(time, status) ~ sex, data = lung)
  p <- draw_survfit(sf2)
  expect_s3_class(p, "plotly")
})

# draw_table ----
test_that("draw_table creates a plotly object", {
  df <- data.frame(
    Name = c("Alice", "Bob", "Charlie"),
    Age = c(25, 30, 35),
    Score = c(90.5, 85.0, 88.0)
  )
  p <- draw_table(
    df,
    main = "Sample Table",
    main_col = "#00b2b2"
  )
  expect_s3_class(p, "plotly")
})

# draw_ts ----
test_that("draw_ts creates a plotly object", {
  time1 <- sample(seq(
    as.Date("2020-03-01"),
    as.Date("2020-07-23"),
    length.out = 100
  ))
  time2 <- sample(seq(
    as.Date("2020-05-01"),
    as.Date("2020-09-23"),
    length.out = 140
  ))
  time <- c(time1, time2)
  x <- c(rnorm(100), rnorm(140, 1, 1.5))
  group <- c(rep("Alpha", 100), rep("Beta", 140))
  p <- draw_ts(x, time, 7, group)
  expect_s3_class(p, "plotly")
})

# draw_varimp ----
test_that("draw_varimp creates a plotly object", {
  x <- rnorm(10)
  names(x) <- paste0("Feature_", seq(x))
  p <- draw_varimp(x)
  expect_s3_class(p, "plotly")
  p_h <- draw_varimp(x, orientation = "h")
  expect_s3_class(p_h, "plotly")
})

# draw_volcano ----
test_that("draw_volcano creates a plotly object", {
  set.seed(2019)
  x <- rnorm(100, mean = 0.5, sd = 2)
  pvals <- runif(100, min = 0, max = 0.1)
  p <- draw_volcano(x, pvals)
  expect_s3_class(p, "plotly")
})

# draw_xt ----
test_that("draw_xt creates a plotly object", {
  datetime <- seq(
    as.POSIXct("2020-01-01 00:00"),
    as.POSIXct("2020-01-02 00:00"),
    by = "hour"
  )
  df <- data.frame(
    datetime = datetime,
    value1 = rnorm(length(datetime)),
    value2 = rnorm(length(datetime))
  )
  p <- draw_xt(df, x = df[, 1], y = df[, 2:3])
  expect_s3_class(p, "plotly")
})


================================================
FILE: tests/testthat/test_idx.R
================================================
# test_idx.R
# ::rtemis::
# 2025 EDG rtemis.org

# Packages ----
library(data.table)

# Data ----
xdf <- iris
xdt <- as.data.table(iris)
idx <- c("Sepal.Length", "Species")
idi <- c(1L, 5L)

# Test inc(data.frame) ----
xdf[, idx, drop = FALSE]
xdf[, idi, drop = FALSE]
inc(xdf, idx)
inc(xdf, idi)
test_that("inc(data.frame) works", {
  expect_equal(xdf[, idx, drop = FALSE], inc(xdf, idx))
  expect_equal(xdf[, idi, drop = FALSE], inc(xdf, idi))
  expect_equal(inc(xdf, idx), inc(xdf, idi))
})

# Test inc(data.table) ----
xdt[, ..idx]
xdt[, idx, with = FALSE]
xdt[, ..idi]
xdt[, idi, with = FALSE]
inc(xdt, idx)
inc(xdt, idi)
test_that("inc(data.table) works", {
  expect_equal(xdt[, ..idx], inc(xdt, idx))
  expect_equal(xdt[, ..idi], inc(xdt, idi))
  expect_equal(inc(xdt, idx), inc(xdt, idi))
})

# Test exc(data.frame) ----
xdf[, -which(names(xdf) %in% idx)]
xdf[, -idi]
exc(xdf, idx)
exc(xdf, idi)
test_that("exc(data.frame) works", {
  expect_equal(xdf[, -which(names(xdf) %in% idx)], exc(xdf, idx))
  expect_equal(xdf[, -idi], exc(xdf, idi))
})

# Test exc(data.table) ----
xdt[, !..idx]
xdt[, !idx, with = FALSE]
xdt[, !..idi]
xdt[, !idi, with = FALSE]
exc(xdt, idx)
test_that("exc(data.table) works", {
  expect_equal(xdt[, !..idx, with = FALSE], exc(xdt, idx))
})


================================================
FILE: tests/testthat/test_massGLM.R
================================================
# test_MassGLM.R
# ::rtemis::
# 2025 EDG rtemis.org

# library(rtemis)
# library(data.table)
# library(testthat)
set.seed(2022)
n <- 40L
y <- data.table(rnormmat(500, n))
x <- data.table(
  x1 = y[[3]] - y[[5]] + y[[14]] + rnorm(500),
  x2 = y[[21]] + rnorm(500)
)

# massGLM ----
massmod <- massGLM(x, y)
test_that("massGLM creates MassGLM object", {
  expect_s7_class(massmod, MassGLM)
})

# plot.MassGLM ----
test_that("plot.MassGLM creates plotly object", {
  plt <- plot(massmod)
  expect_s3_class(plt, "plotly")
})

# plot_manhattan.MassGLM ----
test_that("plot_manhattan.MassGLM creates plotly object", {
  plt <- plot_manhattan(massmod)
  expect_s3_class(plt, "plotly")
})


================================================
FILE: tests/testthat/test_msg_sink.R
================================================
# test_msg_sink.R
# ::rtemis::
# 2026- EDG rtemis.org

# set_msg_sink ----
test_that("set_msg_sink() accepts a function", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  set_msg_sink(function(m) NULL)
  expect_true(is.function(get_msg_sink()))
})

test_that("set_msg_sink() accepts NULL", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  set_msg_sink(function(m) NULL)
  set_msg_sink(NULL)
  expect_null(get_msg_sink())
})

test_that("set_msg_sink() rejects non-function, non-NULL inputs", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  expect_error(set_msg_sink("a string"))
  expect_error(set_msg_sink(123))
  expect_error(set_msg_sink(list()))
})

test_that("set_msg_sink() returns previous sink invisibly", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  fn1 <- function(m) NULL
  fn2 <- function(m) NULL
  set_msg_sink(NULL)
  prev1 <- set_msg_sink(fn1)
  expect_null(prev1)
  prev2 <- set_msg_sink(fn2)
  expect_identical(prev2, fn1)
})


# get_msg_sink ----
test_that("get_msg_sink() returns NULL when no sink is set", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  set_msg_sink(NULL)
  expect_null(get_msg_sink())
})

test_that("get_msg_sink() returns the current sink function", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  fn <- function(m) NULL
  set_msg_sink(fn)
  expect_identical(get_msg_sink(), fn)
})


# with_msg_sink ----
test_that("with_msg_sink() sets and restores", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  set_msg_sink(NULL)
  with_msg_sink(function(m) NULL, {
    expect_true(is.function(get_msg_sink()))
  })
  expect_null(get_msg_sink())
})

test_that("with_msg_sink() restores even on error", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  set_msg_sink(NULL)
  expect_error(
    with_msg_sink(function(m) NULL, stop("boom")),
    "boom"
  )
  expect_null(get_msg_sink())
})

test_that("with_msg_sink() preserves a previously set outer sink", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  outer <- function(m) NULL
  inner <- function(m) NULL
  set_msg_sink(outer)
  with_msg_sink(inner, {
    expect_identical(get_msg_sink(), inner)
  })
  expect_identical(get_msg_sink(), outer)
})


# msg() / msg0() / msgstart() / msgdone() routing ----
test_that("msg() emits a console message when no sink is set", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  set_msg_sink(NULL)
  expect_message(msg("hello"))
})

test_that("msg() routes to the sink and suppresses console output", {
  captured <- list()
  with_msg_sink(
    function(m) captured[[length(captured) + 1L]] <<- m,
    {
      expect_silent(msg("hello world"))
    }
  )
  expect_length(captured, 1L)
  expect_equal(captured[[1L]][["text"]], "hello world")
  expect_equal(captured[[1L]][["level"]], "info")
  expect_true(is.character(captured[[1L]][["ts"]]))
})

test_that("msg0() routes to the sink with sep = ''", {
  captured <- list()
  with_msg_sink(
    function(m) captured[[length(captured) + 1L]] <<- m,
    {
      expect_silent(msg0("hello", "world"))
    }
  )
  expect_length(captured, 1L)
  expect_equal(captured[[1L]][["text"]], "helloworld")
  expect_equal(captured[[1L]][["level"]], "info")
})

test_that("msgstart() routes to the sink with level = 'start'", {
  captured <- list()
  with_msg_sink(
    function(m) captured[[length(captured) + 1L]] <<- m,
    {
      expect_silent(msgstart("Starting..."))
    }
  )
  expect_length(captured, 1L)
  expect_equal(captured[[1L]][["text"]], "Starting...")
  expect_equal(captured[[1L]][["level"]], "start")
})

test_that("msgdone() routes to the sink with level = 'done' and a caller", {
  captured <- list()
  with_msg_sink(
    function(m) captured[[length(captured) + 1L]] <<- m,
    {
      expect_silent(msgdone())
    }
  )
  expect_length(captured, 1L)
  expect_equal(captured[[1L]][["level"]], "done")
  # caller may be NA depending on test harness call stack — assert type only
  expect_true(
    is.character(captured[[1L]][["caller"]]) ||
      is.na(captured[[1L]][["caller"]])
  )
})

test_that("multiple msg variants accumulate as separate sink events in order", {
  captured <- list()
  with_msg_sink(
    function(m) captured[[length(captured) + 1L]] <<- m,
    {
      msg("one")
      msg0("two")
      msgstart("three")
      msgdone()
    }
  )
  expect_length(captured, 4L)
  expect_equal(
    vapply(captured, `[[`, character(1L), "level"),
    c("info", "info", "start", "done")
  )
  expect_equal(captured[[1L]][["text"]], "one")
  expect_equal(captured[[2L]][["text"]], "two")
  expect_equal(captured[[3L]][["text"]], "three")
})

test_that("verbosity = 0 short-circuits before reaching the sink", {
  captured <- list()
  with_msg_sink(
    function(m) captured[[length(captured) + 1L]] <<- m,
    {
      msg("ignored", verbosity = 0L)
      msg0("ignored", verbosity = 0L)
    }
  )
  expect_length(captured, 0L)
})

test_that("clearing the sink restores console output", {
  on.exit(set_msg_sink(NULL), add = TRUE)
  set_msg_sink(function(m) NULL)
  expect_silent(msg("under sink"))
  set_msg_sink(NULL)
  expect_message(msg("after clear"))
})


================================================
FILE: tests/testthat/test_strings.R
================================================
# test_strings.R
# ::rtemis::
# 2025 EDG rtemis.org

# repr_ls ----
x <- list(
  a = 1:5,
  b = letters[1:5],
  c = rnorm(5)
)
out <- repr_ls(x, title = "Test List")
test_that("repr_ls() works", {
  expect_true(is.character(out))
})

## Long list ----
x <- list(
  a = 1:100,
  b = letters[1:100],
  c = iris,
  d = sample(letters, 100, replace = TRUE),
  e = runif(100),
  f = setup_Preprocessor(),
  g = rpois(100, 2),
  h = rbinom(100, 10, 0.5),
  i = setup_PCA(),
  j = rnorm(100),
  k = rnorm(100),
  l = setup_LightCART()
)

test_that("repr_ls() handles long lists", {
  expect_true(is.character(repr_ls(x, limit = 5L)))
  expect_true(is.character(repr_ls(x, limit = -1L)))
})


================================================
FILE: tests/testthat/test_to_json.R
================================================
# test_to_json.R
# ::rtemis::
# 2026- EDG rtemis.org

skip_if_not_installed("jsonlite")

library(data.table)


# Data ----
n <- 100L
x <- rnormmat(n, 3L, seed = 2026L)
y <- x[, 1L] + x[, 2L] + rnorm(n)
datr <- data.table(x, y)


# Generic ----
test_that("to_json() is a registered S7 generic", {
  expect_true(inherits(to_json, "S7_generic"))
})


# Supervised (Regression) ----
mod_r_glm <- train(x = datr, algorithm = "glm")

test_that("to_json(Regression) returns a list with .class and core fields", {
  j <- to_json(mod_r_glm)
  expect_type(j, "list")
  expect_equal(j[[".class"]], "Regression")
  expect_equal(j[["type"]], "Regression")
  expect_true(is.character(j[["xnames"]]))
  expect_equal(j[["n_features"]], length(mod_r_glm@xnames))
})

test_that("to_json(Regression) recurses into nested S7 props with .class tags", {
  j <- to_json(mod_r_glm)
  expect_true(is.list(j[["metrics_training"]]))
  expect_true(".class" %in% names(j[["metrics_training"]]))
  expect_true(is.list(j[["execution_config"]]))
  expect_equal(j[["execution_config"]][[".class"]], "ExecutionConfig")
})

test_that("to_json(Regression) is JSON-serializable and round-trips", {
  j <- to_json(mod_r_glm)
  txt <- jsonlite::toJSON(j, auto_unbox = TRUE, na = "null", null = "null")
  expect_true(jsonlite::validate(txt))
  parsed <- jsonlite::fromJSON(txt, simplifyVector = FALSE)
  expect_equal(parsed[[".class"]], "Regression")
  expect_equal(parsed[["type"]], "Regression")
})

test_that("to_json(Regression) excludes model, raw vectors, session_info", {
  j <- to_json(mod_r_glm)
  expect_false("model" %in% names(j))
  expect_false("y_training" %in% names(j))
  expect_false("predicted_training" %in% names(j))
  expect_false("session_info" %in% names(j))
  expect_false("extra" %in% names(j))
})

test_that("to_json(Regression) drops NULL fields cleanly", {
  j <- to_json(mod_r_glm)
  expect_true(all(vapply(j, function(v) !is.null(v), logical(1L))))
})


# Supervised (Classification) ----
datc <- data.frame(iris[51:150, ])
datc$Species <- factor(datc$Species)
mod_c_glm <- train(x = datc, algorithm = "glm")

test_that("to_json(Classification) tags class and includes binclasspos", {
  j <- to_json(mod_c_glm)
  expect_equal(j[[".class"]], "Classification")
  expect_true("binclasspos" %in% names(j))
  expect_true(is.integer(j[["binclasspos"]]))
})


# SupervisedRes ----
resmod <- train(
  x = datr,
  algorithm = "glm",
  outer_resampling_config = setup_Resampler(n_resamples = 3L, type = "KFold")
)

test_that("to_json(RegressionRes) returns a list with .class and resample summary", {
  j <- to_json(resmod)
  expect_type(j, "list")
  expect_equal(j[[".class"]], "RegressionRes")
  expect_equal(j[["n_resamples"]], 3L)
  expect_true(is.list(j[["outer_resampler"]]))
  expect_true(is.list(j[["metrics_training"]]))
  expect_true(is.list(j[["metrics_test"]]))
})

test_that("to_json(RegressionRes) is JSON-serializable", {
  j <- to_json(resmod)
  txt <- jsonlite::toJSON(j, auto_unbox = TRUE, na = "null", null = "null")
  expect_true(jsonlite::validate(txt))
  parsed <- jsonlite::fromJSON(txt, simplifyVector = FALSE)
  expect_equal(parsed[[".class"]], "RegressionRes")
  expect_equal(parsed[["n_resamples"]], 3L)
})

test_that("to_json(RegressionRes) excludes models list (only summary count)", {
  j <- to_json(resmod)
  expect_false("models" %in% names(j))
})


# Default method ----
test_that("default to_json walks props and tags .class for arbitrary S7 objects", {
  exec <- setup_ExecutionConfig()
  j <- to_json(exec)
  expect_type(j, "list")
  expect_equal(j[[".class"]], "ExecutionConfig")
})


================================================
FILE: tests/testthat.R
================================================
library(rtemis)
library(testthat)

test_check("rtemis")