[
  {
    "path": ".Rbuildignore",
    "content": "^\\cache$\n^codemeta\\.json$\n^Meta$\n^doc$\n^.*\\.Rproj$\n^\\.Rproj\\.user$\n^README\\.Rmd$\n^Rplots.pdf$\n^README-.*\\.png$\n^CONDUCT\\.md$\n^SECURITY\\.md$\n^cran-comments\\.md$\n^CODE_OF_CONDUCT\\.md$\n^SUPPORT\\.md$\n^\\.github$\n^NEWS$\n^docs$\n^revdep$\npublication/*\n^codecov\\.yml$\n^\\.coveralls\\.yml$\n^\\.travis\\.yml$\n^_pkgdown\\.yml$\n^_pkgdown\\.yaml$\n^appveyor\\.yml$\n^.gitlab-ci\\.yml$\n^data-raw$\n^pkgdown$\n^\\.httr-oauth$\n^CRAN-RELEASE$\ntests\\^spelling\n^LICENSE\\.md$\n^\\.lintr$\n\\.code-workspace$\n^\\.circleci$\n^tests/manual$\n^revdep$\n^\\.covrignore$\n^\\.github/ISSUE_TEMPLATE$\n^paper.*$\nreferences.bib\n^API$\n^\\.pre-commit-config\\.yaml$\n^\\.github/workflows/R\\.yaml$\n^\\.github/workflows/pr-commands\\.yaml$\n^hextools/.\n^WIP/.\n^CRAN-SUBMISSION$\ndocs\n^.dev$\n^vignettes/s.\n^vignettes/t.\n^[\\.]?air\\.toml$\n^\\.vscode$\n^\\.git-blame-ignore-revs$\n"
  },
  {
    "path": ".dev/_BENCHMARK_RESHAPE.R",
    "content": "library(tidyr)\nlibrary(dplyr)\nlibrary(datawizard)\n\n### DATA_TO_LONG ==========================================\n\n\n# SLOW (5M rows)\n\nwide_data <- data.frame(replicate(5, rnorm(10)))\ntmp <- list()\nfor (i in 1:500000) {\n  tmp[[i]] <- wide_data\n}\n\ntmp <- data.table::rbindlist(tmp) |>\n  as_tibble()\n\nex1_l <- bench::mark(\n  old = old_data_to_long(tmp),\n  new = data_to_long(tmp),\n  tidyr = pivot_longer(tmp, cols = everything()),\n  iterations = 10\n)\n\n\nex2_l <- bench::mark(\n  old = relig_income %>%\n    old_data_to_long(-\"religion\", names_to = \"income\", values_to = \"count\"),\n  new = relig_income %>%\n    data_to_long(-\"religion\", names_to = \"income\", values_to = \"count\"),\n  tidyr = relig_income %>%\n    pivot_longer(!religion, names_to = \"income\", values_to = \"count\"),\n  iterations = 100\n)\n\nex3_l <- bench::mark(\n  old = billboard %>%\n    old_data_to_long(\n      cols = starts_with(\"wk\"),\n      names_to = \"week\",\n      values_to = \"rank\"\n    ),\n  new = billboard %>%\n    data_to_long(\n      cols = starts_with(\"wk\"),\n      names_to = \"week\",\n      values_to = \"rank\"\n    ),\n  tidyr = billboard %>%\n    pivot_longer(\n      cols = starts_with(\"wk\"),\n      names_to = \"week\",\n      values_to = \"rank\"\n    ),\n  iterations = 50\n)\n\nex4_l <- bench::mark(\n  old = who |>\n    old_data_to_long(\n      cols = 5:60,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_sep = \"_\",\n      values_to = \"count\"\n    ),\n  new = who |>\n    data_to_long(\n      cols = 5:60,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_sep = \"_\",\n      values_to = \"count\"\n    ),\n  tidyr = who |>\n    pivot_longer(\n      cols = 5:60,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_sep = \"_\",\n      values_to = \"count\"\n    ),\n  iterations = 10\n)\n\nex5_l <- bench::mark(\n  old = who |>\n    old_data_to_long(\n      cols = 5:60,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_pattern = \"new_?(.*)_(.)(.*)\",\n      values_to = \"count\"\n    ),\n  new = who |>\n    data_to_long(\n      cols = 5:60,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_pattern = \"new_?(.*)_(.)(.*)\",\n      values_to = \"count\"\n    ),\n  tidyr = who |>\n    pivot_longer(\n      cols = 5:60,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_pattern = \"new_?(.*)_(.)(.*)\",\n      values_to = \"count\"\n    ),\n  iterations = 10\n)\n\n\n\n### DATA_TO_WIDE ==========================================\n\nex1_w <- bench::mark(\n  old = fish_encounters %>%\n    old_data_to_wide(\n      names_from = \"station\",\n      values_from = \"seen\",\n      values_fill = 0\n    ),\n  new = fish_encounters %>%\n     data_to_wide(\n      names_from = \"station\",\n      values_from = \"seen\",\n      values_fill = 0\n    ),\n  tidyr = fish_encounters %>%\n    pivot_wider(\n      names_from = \"station\",\n      values_from = \"seen\",\n      values_fill = 0\n    ),\n  iterations = 100\n)\n\n\n\n\nproduction <- expand_grid(\n  product = letters,\n  country = paste0(letters, \"I\"),\n  year = 2000:2025\n) %>%\n  mutate(production = rnorm(nrow(.)))\n\n\nex2_w <- bench::mark(\n  old = production %>%\n    old_data_to_wide(\n      names_from = c(\"product\", \"country\"),\n      values_from = \"production\"\n    ),\n  new = production %>%\n     data_to_wide(\n      names_from = c(\"product\", \"country\"),\n      values_from = \"production\"\n    ),\n  tidyr = production %>%\n    pivot_wider(\n      names_from = c(product, country),\n      values_from = production\n    ),\n  iterations = 10\n)\n\n\nex3_w <- bench::mark(\n  old = production %>%\n    old_data_to_wide(\n      names_from = c(\"product\", \"country\"),\n      values_from = \"production\",\n      names_glue = \"prod_{product}_{country}\"\n    ),\n  new = production %>%\n     data_to_wide(\n      names_from = c(\"product\", \"country\"),\n      values_from = \"production\",\n      names_glue = \"prod_{product}_{country}\"\n    ),\n  tidyr = production %>%\n    pivot_wider(\n      names_from = c(product, country),\n      values_from = production,\n      names_glue = \"prod_{product}_{country}\"\n    ),\n  iterations = 10\n)\n\n\ntmp <- list()\nfor (i in 1:1000) {\n  tmp[[i]] <- us_rent_income\n}\n\ntmp <- data.table::rbindlist(tmp) |>\n  as_tibble()\ntmp$GEOID <- rep(1:52000, each = 2)\ntmp$NAME <- as.character(rep(1:52000, each = 2))\n\nex4_w <- bench::mark(\n  old = tmp %>%\n    old_data_to_wide(\n      names_from = \"variable\",\n      values_from = c(\"estimate\", \"moe\")\n    ),\n  new = tmp %>%\n     data_to_wide(\n      names_from = \"variable\",\n      values_from = c(\"estimate\", \"moe\")\n    ),\n  tidyr = tmp %>%\n    pivot_wider(\n      names_from = \"variable\",\n      values_from = c(\"estimate\", \"moe\")\n    ),\n  iterations = 10\n)\n\n# SLOW (1M rows) ============\n\nset.seed(123)\ncontacts <- tibble(\n  id = rep(1:500000, each = 2),\n  field = rep(c(\"a\", \"b\"), 500000),\n  value = sample(letters, 1000000, replace = TRUE)\n)\n\nex5_w <- bench::mark(\n  old = contacts %>%\n    old_data_to_wide(names_from = \"field\", values_from = \"value\"),\n  new = contacts %>%\n     data_to_wide(names_from = \"field\", values_from = \"value\"),\n  tidyr = contacts %>%\n    tidyr::pivot_wider(names_from = field, values_from = value),\n  iterations = 1\n)\n\n\n# SLOWER (10M rows) ============\n\nset.seed(123)\ncontacts <- tibble(\n  id = rep(1:5000000, each = 2),\n  field = rep(c(\"a\", \"b\"), 5000000),\n  value = sample(letters, 10000000, replace = TRUE)\n)\n\nex6_w <- bench::mark(\n  old = contacts %>%\n    old_data_to_wide(names_from = \"field\", values_from = \"value\"),\n  new = contacts %>%\n     data_to_wide(names_from = \"field\", values_from = \"value\"),\n  tidyr = contacts %>%\n    tidyr::pivot_wider(names_from = field, values_from = value),\n  iterations = 1\n)\n\n\n\n\nreprex:::prex({\n  ### DATA_TO_LONG ==========================================\n\n  ex1_l\n  ex2_l\n  ex3_l\n  ex4_l\n  ex5_l\n\n  ### DATA_TO_WIDE ==========================================\n\n  ex1_w\n  ex2_w\n  ex3_w\n  ex4_w\n  ex5_w\n  ex6_w\n})\n"
  },
  {
    "path": ".dev/html5.R",
    "content": "Sys.setenv(\"_R_CHECK_RD_VALIDATE_RD2HTML_\" = \"true\")\nSys.setenv(\"_R_CHECK_CRAN_INCOMING_REMOTE_\" = \"false\")\nSys.setenv(\"_R_CHECK_CRAN_INCOMING_\" = \"false\")\n\nrcmdcheck::rcmdcheck(\n  args = c(\"--as-cran\", \"--no-codoc\", \"--no-examples\", \"--no-tests\", \"--no-vignettes\", \"--no-build-vignettes\", \"--ignore-vignettes\", \"--no-install\"),\n  build_args = c(\"--no-build-vignettes\"),\n  error_on = \"note\"\n)\n"
  },
  {
    "path": ".dev/revdepcheck.R",
    "content": "library(revdepcheck)\n\nrevdep_check(num_workers = 4)\nrevdep_report()\nrevdep_reset()\n"
  },
  {
    "path": ".dev/test-value_at.R",
    "content": "test_that(\"value_at\", {\n  data(efc, package = \"datawizard\")\n  expect_equal(value_at(efc$e42dep, 5), 4, ignore_attr = TRUE)\n  expect_equal(value_at(efc$c12hour, 4), NA_real_, ignore_attr = TRUE)\n  expect_equal(value_at(efc$c12hour, 4, remove_na = TRUE), 168, ignore_attr = TRUE)\n  expect_equal(value_at(efc$c12hour, 5:7), efc$c12hour[5:7], ignore_attr = TRUE)\n  expect_equal(value_at(efc$e42dep, 123456, default = 55), 55, ignore_attr = TRUE)\n  expect_null(value_at(efc$e42dep, 123456))\n  expect_null(value_at(efc$e42dep, NULL))\n  expect_error(value_at(efc$e42dep, NA), regex = \"`position` can't\")\n  expect_error(value_at(efc$e42dep, c(3, NA)), regex = \"`position` can't\")\n})\n"
  },
  {
    "path": ".dev/value_at.R",
    "content": "#' @title Find the value(s) at a specific position in a variable\n#' @name value_at\n#'\n#' @description This function can be used to extract one or more values at a\n#' specific position in a variable.\n#'\n#' @param x A vector or factor.\n#' @param position An integer or a vector of integers, indicating the position(s)\n#' of the value(s) to be returned. Negative values are counted from the end of\n#' the vector. If `NA`, an error is thrown.\n#' @param remove_na Logical, if `TRUE`, missing values are removed before\n#' computing the position. If `FALSE`, missing values are included in the\n#' computation.\n#' @param default The value to be returned if the position is out of range.\n#'\n#' @seealso `data_summary()` to use `value_at()` inside a `data_summary()` call.\n#'\n#' @return A vector with the value(s) at the specified position(s).\n#'\n#' @examples\n#' data(mtcars)\n#' # 5th value\n#' value_at(mtcars$mpg, 5)\n#' # last value\n#' value_at(mtcars$mpg, -1)\n#' # out of range, return default\n#' value_at(mtcars$mpg, 150)\n#' # return 2nd and fifth value\n#' value_at(mtcars$mpg, c(2, 5))\n#' @export\nvalue_at <- function(x, position = 1, default = NULL, remove_na = FALSE) {\n  if (remove_na) {\n    x <- x[!is.na(x)]\n  }\n  n <- length(x)\n  unlist(lapply(position, .values_at, x = x, n = n, default = default), use.names = FALSE)\n}\n\n# helper ----\n\n.values_at <- function(x, position, n, default) {\n  if (is.na(position)) {\n    insight::format_error(\"`position` can't be `NA`.\")\n  }\n  if (position < 0L) {\n    position <- position + n + 1\n  }\n  if (position <= 0 || position > n) {\n    return(default)\n  }\n  x[position]\n}\n"
  },
  {
    "path": ".git-blame-ignore-revs",
    "content": "# Air formatting\n5bd245e0bc12d2eecbcfa480a231b6df3ab7d684"
  },
  {
    "path": ".github/.gitignore",
    "content": "*.html\n"
  },
  {
    "path": ".github/CODE_OF_CONDUCT.md",
    "content": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participation in our\ncommunity a harassment-free experience for everyone, regardless of age, body\nsize, visible or invisible disability, ethnicity, sex characteristics, gender\nidentity and expression, level of experience, education, socio-economic status,\nnationality, personal appearance, race, caste, color, religion, or sexual\nidentity and orientation.\n\nWe pledge to act and interact in ways that contribute to an open, welcoming,\ndiverse, inclusive, and healthy community.\n\n## Our Standards\n\nExamples of behavior that contributes to a positive environment for our\ncommunity include:\n\n* Demonstrating empathy and kindness toward other people\n* Being respectful of differing opinions, viewpoints, and experiences\n* Giving and gracefully accepting constructive feedback\n* Accepting responsibility and apologizing to those affected by our mistakes,\n  and learning from the experience\n* Focusing on what is best not just for us as individuals, but for the overall\n  community\n\nExamples of unacceptable behavior include:\n\n* The use of sexualized language or imagery, and sexual attention or advances of\n  any kind\n* Trolling, insulting or derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or email address,\n  without their explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Enforcement Responsibilities\n\nCommunity leaders are responsible for clarifying and enforcing our standards of\nacceptable behavior and will take appropriate and fair corrective action in\nresponse to any behavior that they deem inappropriate, threatening, offensive,\nor harmful.\n\nCommunity leaders have the right and responsibility to remove, edit, or reject\ncomments, commits, code, wiki edits, issues, and other contributions that are\nnot aligned to this Code of Conduct, and will communicate reasons for moderation\ndecisions when appropriate.\n\n## Scope\n\nThis Code of Conduct applies within all community spaces, and also applies when\nan individual is officially representing the community in public spaces.\nExamples of representing our community include using an official e-mail address,\nposting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported to the community leaders responsible for enforcement at patilindrajeet.science@gmail.com. \nAll complaints will be reviewed and investigated promptly and fairly.\n\nAll community leaders are obligated to respect the privacy and security of the\nreporter of any incident.\n\n## Enforcement Guidelines\n\nCommunity leaders will follow these Community Impact Guidelines in determining\nthe consequences for any action they deem in violation of this Code of Conduct:\n\n### 1. Correction\n\n**Community Impact**: Use of inappropriate language or other behavior deemed\nunprofessional or unwelcome in the community.\n\n**Consequence**: A private, written warning from community leaders, providing\nclarity around the nature of the violation and an explanation of why the\nbehavior was inappropriate. A public apology may be requested.\n\n### 2. Warning\n\n**Community Impact**: A violation through a single incident or series of\nactions.\n\n**Consequence**: A warning with consequences for continued behavior. No\ninteraction with the people involved, including unsolicited interaction with\nthose enforcing the Code of Conduct, for a specified period of time. This\nincludes avoiding interactions in community spaces as well as external channels\nlike social media. Violating these terms may lead to a temporary or permanent\nban.\n\n### 3. Temporary Ban\n\n**Community Impact**: A serious violation of community standards, including\nsustained inappropriate behavior.\n\n**Consequence**: A temporary ban from any sort of interaction or public\ncommunication with the community for a specified period of time. No public or\nprivate interaction with the people involved, including unsolicited interaction\nwith those enforcing the Code of Conduct, is allowed during this period.\nViolating these terms may lead to a permanent ban.\n\n### 4. Permanent Ban\n\n**Community Impact**: Demonstrating a pattern of violation of community\nstandards, including sustained inappropriate behavior, harassment of an\nindividual, or aggression toward or disparagement of classes of individuals.\n\n**Consequence**: A permanent ban from any sort of public interaction within the\ncommunity.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage],\nversion 2.1, available at\n<https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.\n\nCommunity Impact Guidelines were inspired by\n[Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion].\n\nFor answers to common questions about this code of conduct, see the FAQ at\n<https://www.contributor-covenant.org/faq>. Translations are available at <https://www.contributor-covenant.org/translations>.\n\n[homepage]: https://www.contributor-covenant.org\n"
  },
  {
    "path": ".github/CONTRIBUTING.md",
    "content": "# Contributing to datawizard\n\nThis outlines how to propose a change to **datawizard**. \n\n## Fixing typos\n\nSmall typos or grammatical errors in documentation may be edited directly using the GitHub web interface, so long as the changes are made in the _source_ file. If you want to fix typos in the documentation, please edit the related `.R` file in the `R/` folder. Do _not_ edit an `.Rd` file in `man/`.\n\n## Filing an issue\n\nThe easiest way to propose a change or new feature is to file an issue. If you've found a\nbug, you may also create an associated issue. If possible, try to illustrate your proposal or the bug with a minimal [reproducible example](https://www.tidyverse.org/help/#reprex).\n\n## Pull requests\n\n*  Please create a Git branch for each pull request (PR).\n*  Your contributed code should roughly follow the [R style guide](http://style.tidyverse.org), but in particular our [**easystats convention of code-style**](https://github.com/easystats/easystats#convention-of-code-style).\n*  datawizard uses [roxygen2](https://cran.r-project.org/package=roxygen2), with\n[Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html),\nfor documentation.\n*  datawizard uses [testthat](https://cran.r-project.org/package=testthat). Adding tests to the PR makes it easier for me to merge your PR into the code base.\n*  If your PR is a user-visible change, you may add a bullet to the top of `NEWS.md` describing the changes made. You may optionally add your GitHub username, and links to relevant issue(s)/PR(s).\n\n## Code of Conduct\n\nPlease note that this project is released with a [Contributor Code of Conduct](https://easystats.github.io/datawizard/CODE_OF_CONDUCT.html). By participating in this project you agree to\nabide by its terms.\n"
  },
  {
    "path": ".github/FUNDING.yml",
    "content": "# These are supported funding model platforms\n\ngithub: easystats\n"
  },
  {
    "path": ".github/SUPPORT.md",
    "content": "# Getting help with `{datawizard}`\n\nThanks for using `{datawizard}`. Before filing an issue, there are a few places\nto explore and pieces to put together to make the process as smooth as possible.\n\nStart by making a minimal **repr**oducible **ex**ample using the \n[reprex](http://reprex.tidyverse.org/) package. If you haven't heard of or used \nreprex before, you're in for a treat! Seriously, reprex will make all of your \nR-question-asking endeavors easier (which is a pretty insane ROI for the five to \nten minutes it'll take you to learn what it's all about). For additional reprex\npointers, check out the [Get help!](https://www.tidyverse.org/help/) resource\nused by the tidyverse team.\n\nArmed with your reprex, the next step is to figure out where to ask: \n\n  * If it's a question: start with StackOverflow. There are more people there to answer questions.\n  * If it's a bug: you're in the right place, file an issue.\n  * If you're not sure: let the community help you figure it out! If your \n    problem _is_ a bug or a feature request, you can easily return here and \n    report it. \n\nBefore opening a new issue, be sure to [search issues and pull requests](https://github.com/easystats/datawizard/issues) to make sure the \nbug hasn't been reported and/or already fixed in the development version. By \ndefault, the search will be pre-populated with `is:issue is:open`. You can \n[edit the qualifiers](https://help.github.com/articles/searching-issues-and-pull-requests/) \n(e.g. `is:pr`, `is:closed`) as needed. For example, you'd simply\nremove `is:open` to search _all_ issues in the repo, open or closed.\n\nThanks for your help!\n"
  },
  {
    "path": ".github/dependabot.yaml",
    "content": "version: 2\n\nupdates:\n  # Keep dependencies for GitHub Actions up-to-date\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"weekly\"\n"
  },
  {
    "path": ".github/workflows/R-CMD-check-hard.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\n#\n# NOTE: This workflow only directly installs \"hard\" dependencies, i.e. Depends,\n# Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never\n# installed, with the exception of testthat, knitr, and rmarkdown. The cache is\n# never used to avoid accidentally restoring a cache containing a suggested\n# dependency.\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: R-CMD-check-hard\n\njobs:\n  R-CMD-check-hard:\n    uses: easystats/workflows/.github/workflows/R-CMD-check-hard.yaml@main\n"
  },
  {
    "path": ".github/workflows/R-CMD-check.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\n#\n# NOTE: This workflow is overkill for most R packages and\n# check-standard.yaml is likely a better choice.\n# usethis::use_github_action(\"check-standard\") will install it.\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: R-CMD-check\n\njobs:\n  R-CMD-check:\n    uses: easystats/workflows/.github/workflows/R-CMD-check.yaml@main\n"
  },
  {
    "path": ".github/workflows/check-all-examples.yaml",
    "content": "# Make sure all examples run successfully, even the ones that are not supposed\n# to be run or tested on CRAN machines by default.\n#\n# The examples that fail should use\n#  - `if (FALSE) { ... }` (if example is included only for illustrative purposes)\n#  - `try({ ... })` (if the intent is to show the error)\n#\n# This workflow helps find such failing examples that need to be modified.\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: check-all-examples\n\njobs:\n  check-all-examples:\n    uses: easystats/workflows/.github/workflows/check-all-examples.yaml@main\n"
  },
  {
    "path": ".github/workflows/check-link-rot.yaml",
    "content": "on:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: check-link-rot\n\njobs:\n  check-link-rot:\n    uses: easystats/workflows/.github/workflows/check-link-rot.yaml@main\n"
  },
  {
    "path": ".github/workflows/check-random-test-order.yaml",
    "content": "# Run tests in random order\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: check-random-test-order\n\njobs:\n  check-random-test-order:\n    uses: easystats/workflows/.github/workflows/check-random-test-order.yaml@main\n"
  },
  {
    "path": ".github/workflows/check-readme.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\n\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: check-readme\n\njobs:\n  check-readme:\n    uses: easystats/workflows/.github/workflows/check-readme.yaml@main\n"
  },
  {
    "path": ".github/workflows/check-spelling.yaml",
    "content": "on:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: check-spelling\n\njobs:\n  check-spelling:\n    uses: easystats/workflows/.github/workflows/check-spelling.yaml@main\n"
  },
  {
    "path": ".github/workflows/check-styling.yaml",
    "content": "on:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: check-styling\n\njobs:\n  check-styling:\n    uses: easystats/workflows/.github/workflows/check-styling.yaml@main\n"
  },
  {
    "path": ".github/workflows/check-test-warnings.yaml",
    "content": "# Running tests with options(warn = 2) to fail on test warnings\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: check-test-warnings\n\njobs:\n  check-test-warnings:\n    uses: easystats/workflows/.github/workflows/check-test-warnings.yaml@main\n"
  },
  {
    "path": ".github/workflows/check-vignette-warnings.yaml",
    "content": "# Running tests with options(warn = 2) to fail on test warnings\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: check-vignette-warnings\n\njobs:\n  check-vignette-warnings:\n    uses: easystats/workflows/.github/workflows/check-vignette-warnings.yaml@main\n"
  },
  {
    "path": ".github/workflows/html-5-check.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: html-5-check\n\njobs:\n  html-5-check:\n    uses: easystats/workflows/.github/workflows/html-5-check.yaml@main\n"
  },
  {
    "path": ".github/workflows/lint-changed-files.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\non:\n  pull_request:\n    branches: [main, master]\n\nname: lint-changed-files\n\njobs:\n  lint-changed-files:\n    uses: easystats/workflows/.github/workflows/lint-changed-files.yaml@main\n"
  },
  {
    "path": ".github/workflows/lint.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: lint\n\njobs:\n  lint:\n    uses: easystats/workflows/.github/workflows/lint.yaml@main\n"
  },
  {
    "path": ".github/workflows/pkgdown-no-suggests.yaml",
    "content": "on:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: pkgdown-no-suggests\n\njobs:\n  pkgdown-no-suggests:\n    uses: easystats/workflows/.github/workflows/pkgdown-no-suggests.yaml@main\n"
  },
  {
    "path": ".github/workflows/pkgdown.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n  release:\n    types: [published]\n  workflow_dispatch:\n\nname: pkgdown\n\njobs:\n  pkgdown:\n    uses: easystats/workflows/.github/workflows/pkgdown.yaml@main\n"
  },
  {
    "path": ".github/workflows/test-coverage-examples.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: test-coverage-examples\n\njobs:\n  test-coverage-examples:\n    uses: easystats/workflows/.github/workflows/test-coverage-examples.yaml@main\n"
  },
  {
    "path": ".github/workflows/test-coverage.yaml",
    "content": "# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples\n# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help\non:\n  push:\n    branches: [main, master]\n  pull_request:\n    branches: [main, master]\n\nname: test-coverage\n\njobs:\n  test-coverage:\n    uses: easystats/workflows/.github/workflows/test-coverage.yaml@main\n"
  },
  {
    "path": ".github/workflows/update-to-latest-easystats.yaml",
    "content": "on:\n  schedule:\n    # Check for dependency updates once a month\n    - cron: \"0 0 1 * *\"\n\nname: update-to-latest-easystats\n\njobs:\n  update-to-latest-easystats:\n    uses: easystats/workflows/.github/workflows/update-to-latest-easystats.yaml@main\n"
  },
  {
    "path": ".gitignore",
    "content": "# History files\n.Rhistory\n.Rapp.history\n\n# Session Data files\n.RData\n\n# Example code in package build process\n*-Ex.R\n\n# Output files from R CMD build\n/*.tar.gz\n\n# Output files from R CMD check\n/*.Rcheck/\n\n# RStudio files\n.Rproj.user/\n\n# produced vignettes\nvignettes/*.html\nvignettes/*.pdf\n\n# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3\n.httr-oauth\n\n# knitr and R markdown default cache directories\n/*_cache/\n/cache/\n\n# Temporary files created by R markdown\n*.utf8.md\n*.knit.md\n\n# Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html\nrsconnect/\n\n=========================\n# Operating System Files\n# OSX\n.DS_Store\n.AppleDouble\n.LSOverride\n\n# Thumbnails\n._*\n\n# Files that might appear in the root of a volume\n.DocumentRevisions-V100\n.fseventsd\n.Spotlight-V100\n.TemporaryItems\n.Trashes\n.VolumeIcon.icns\n\n# Directories potentially created on remote AFP share\n.AppleDB\n.AppleDesktop\nNetwork Trash Folder\nTemporary Items\n.apdisk\ndocs\ninst/doc\nCRAN-SUBMISSION\n"
  },
  {
    "path": ".lintr",
    "content": "linters: all_linters(\n    absolute_path_linter = NULL,\n    assignment_linter = NULL,\n    commented_code_linter = NULL,\n    cyclocomp_linter(25L),\n    if_not_else_linter(exceptions = character(0L)),\n    implicit_integer_linter = NULL,\n    library_call_linter = NULL,\n    line_length_linter(120L),\n    namespace_linter = NULL,\n    nonportable_path_linter = NULL,\n    object_name_linter = NULL,\n    object_length_linter(50L),\n    object_usage_linter = NULL,\n    todo_comment_linter = NULL,\n    string_boundary_linter = NULL,\n    strings_as_factors_linter = NULL,\n    undesirable_function_linter = NULL,\n    undesirable_operator_linter = NULL,\n    unnecessary_concatenation_linter(allow_single_expression = FALSE),\n    unused_import_linter = NULL\n  )\n"
  },
  {
    "path": "DESCRIPTION",
    "content": "Type: Package\nPackage: datawizard\nTitle: Easy Data Wrangling and Statistical Transformations\nVersion: 1.3.1\nAuthors@R: c(\n    person(\"Indrajeet\", \"Patil\", , \"patilindrajeet.science@gmail.com\", role = \"aut\",\n           comment = c(ORCID = \"0000-0003-1995-6531\")),\n    person(\"Etienne\", \"Bacher\", , \"etienne.bacher@protonmail.com\", role = c(\"aut\", \"cre\"),\n           comment = c(ORCID = \"0000-0002-9271-5075\")),\n    person(\"Dominique\", \"Makowski\", , \"dom.makowski@gmail.com\", role = \"aut\",\n           comment = c(ORCID = \"0000-0001-5375-9967\")),\n    person(\"Daniel\", \"Lüdecke\", , \"d.luedecke@uke.de\", role = \"aut\",\n           comment = c(ORCID = \"0000-0002-8895-3206\")),\n    person(\"Mattan S.\", \"Ben-Shachar\", , \"matanshm@post.bgu.ac.il\", role = \"aut\",\n           comment = c(ORCID = \"0000-0002-4287-4801\")),\n    person(\"Brenton M.\", \"Wiernik\", , \"brenton@wiernik.org\", role = \"aut\",\n           comment = c(ORCID = \"0000-0001-9560-6336\")),\n    person(\"Rémi\", \"Thériault\", , \"remi.theriault@mail.mcgill.ca\", role = \"ctb\",\n           comment = c(ORCID = \"0000-0003-4315-6788\")),\n    person(\"Thomas J.\", \"Faulkenberry\", , \"faulkenberry@tarleton.edu\", role = \"rev\"),\n    person(\"Robert\", \"Garrett\", , \"rcg4@illinois.edu\", role = \"rev\")\n  )\nMaintainer: Etienne Bacher <etienne.bacher@protonmail.com>\nDescription: A lightweight package to assist in key steps involved in any data\n    analysis workflow: (1) wrangling the raw data to get it in the needed form,\n    (2) applying preprocessing steps and statistical transformations, and\n    (3) compute statistical summaries of data properties and distributions.\n    It is also the data wrangling backend for packages in 'easystats' ecosystem.\n    References: Patil et al. (2022) <doi:10.21105/joss.04684>.\nLicense: MIT + file LICENSE\nURL: https://easystats.github.io/datawizard/\nBugReports: https://github.com/easystats/datawizard/issues\nDepends:\n    R (>= 4.0)\nImports:\n    insight (>= 1.4.6),\n    stats,\n    utils\nSuggests:\n    bayestestR,\n    boot,\n    BH,\n    brms,\n    curl,\n    data.table,\n    dplyr (>= 1.1),\n    effectsize,\n    emmeans,\n    fixest,\n    gamm4,\n    ggplot2 (>= 3.5.0),\n    gt,\n    haven,\n    httr,\n    knitr,\n    lme4,\n    mediation,\n    modelbased,\n    nanoparquet,\n    openssl,\n    parameters (>= 0.21.7),\n    performance (>= 0.14.0),\n    poorman (>= 0.2.7),\n    psych,\n    readxl,\n    readr,\n    rio,\n    rmarkdown,\n    rstanarm,\n    see,\n    testthat (>= 3.2.1),\n    tibble,\n    tidyr,\n    tinytable (>= 0.13.0),\n    withr\nVignetteBuilder:\n    knitr\nEncoding: UTF-8\nLanguage: en-US\nRoxygen: list(markdown = TRUE)\nRoxygenNote: 7.3.3\nConfig/testthat/edition: 3\nConfig/testthat/parallel: true\nConfig/Needs/website: easystats/easystatstemplate\n"
  },
  {
    "path": "LICENSE",
    "content": "YEAR: 2023\nCOPYRIGHT HOLDER: datawizard authors\n"
  },
  {
    "path": "LICENSE.md",
    "content": "# MIT License\n\nCopyright (c) 2023 datawizard authors\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "NAMESPACE",
    "content": "# Generated by roxygen2: do not edit by hand\n\nS3method(as.data.frame,datawizard_crosstabs)\nS3method(as.data.frame,datawizard_tables)\nS3method(as.double,parameters_kurtosis)\nS3method(as.double,parameters_skewness)\nS3method(as.double,parameters_smoothness)\nS3method(as.numeric,parameters_kurtosis)\nS3method(as.numeric,parameters_skewness)\nS3method(as.numeric,parameters_smoothness)\nS3method(as.prop.table,datawizard_crosstab)\nS3method(as.prop.table,datawizard_crosstabs)\nS3method(as.table,datawizard_crosstab)\nS3method(as.table,datawizard_crosstabs)\nS3method(as.table,datawizard_table)\nS3method(as.table,datawizard_tables)\nS3method(assign_labels,character)\nS3method(assign_labels,data.frame)\nS3method(assign_labels,default)\nS3method(assign_labels,factor)\nS3method(assign_labels,numeric)\nS3method(categorize,data.frame)\nS3method(categorize,default)\nS3method(categorize,factor)\nS3method(categorize,grouped_df)\nS3method(categorize,numeric)\nS3method(center,AsIs)\nS3method(center,Date)\nS3method(center,character)\nS3method(center,data.frame)\nS3method(center,default)\nS3method(center,factor)\nS3method(center,grouped_df)\nS3method(center,logical)\nS3method(center,numeric)\nS3method(coef_var,default)\nS3method(coef_var,numeric)\nS3method(convert_na_to,character)\nS3method(convert_na_to,data.frame)\nS3method(convert_na_to,default)\nS3method(convert_na_to,factor)\nS3method(convert_na_to,numeric)\nS3method(convert_to_na,Date)\nS3method(convert_to_na,character)\nS3method(convert_to_na,data.frame)\nS3method(convert_to_na,default)\nS3method(convert_to_na,factor)\nS3method(convert_to_na,logical)\nS3method(convert_to_na,numeric)\nS3method(data_arrange,default)\nS3method(data_arrange,grouped_df)\nS3method(data_duplicated,data.frame)\nS3method(data_duplicated,grouped_df)\nS3method(data_extract,data.frame)\nS3method(data_filter,data.frame)\nS3method(data_filter,grouped_df)\nS3method(data_merge,data.frame)\nS3method(data_merge,list)\nS3method(data_modify,data.frame)\nS3method(data_modify,default)\nS3method(data_modify,grouped_df)\nS3method(data_peek,data.frame)\nS3method(data_summary,data.frame)\nS3method(data_summary,default)\nS3method(data_summary,grouped_df)\nS3method(data_summary,matrix)\nS3method(data_tabulate,data.frame)\nS3method(data_tabulate,default)\nS3method(data_tabulate,grouped_df)\nS3method(data_unique,data.frame)\nS3method(data_unique,grouped_df)\nS3method(describe_distribution,character)\nS3method(describe_distribution,data.frame)\nS3method(describe_distribution,default)\nS3method(describe_distribution,factor)\nS3method(describe_distribution,grouped_df)\nS3method(describe_distribution,list)\nS3method(describe_distribution,numeric)\nS3method(display,data_codebook)\nS3method(display,datawizard_crosstab)\nS3method(display,datawizard_crosstabs)\nS3method(display,datawizard_table)\nS3method(display,datawizard_tables)\nS3method(display,parameters_distribution)\nS3method(format,data_codebook)\nS3method(format,datawizard_crosstab)\nS3method(format,datawizard_table)\nS3method(format,dw_data_peek)\nS3method(format,dw_groupmeans)\nS3method(format,parameters_distribution)\nS3method(kurtosis,data.frame)\nS3method(kurtosis,default)\nS3method(kurtosis,matrix)\nS3method(kurtosis,numeric)\nS3method(labels_to_levels,data.frame)\nS3method(labels_to_levels,default)\nS3method(labels_to_levels,factor)\nS3method(makepredictcall,dw_transformer)\nS3method(means_by_group,data.frame)\nS3method(means_by_group,default)\nS3method(means_by_group,numeric)\nS3method(normalize,data.frame)\nS3method(normalize,factor)\nS3method(normalize,grouped_df)\nS3method(normalize,matrix)\nS3method(normalize,numeric)\nS3method(plot,parameters_distribution)\nS3method(plot,visualisation_recipe)\nS3method(print,data_codebook)\nS3method(print,data_seek)\nS3method(print,datawizard_crosstab)\nS3method(print,datawizard_crosstabs)\nS3method(print,datawizard_table)\nS3method(print,datawizard_tables)\nS3method(print,dw_data_peek)\nS3method(print,dw_data_summary)\nS3method(print,dw_groupmeans)\nS3method(print,dw_groupmeans_list)\nS3method(print,dw_transformer)\nS3method(print,parameters_distribution)\nS3method(print,parameters_kurtosis)\nS3method(print,parameters_skewness)\nS3method(print,visualisation_recipe)\nS3method(print_html,data_codebook)\nS3method(print_html,datawizard_crosstab)\nS3method(print_html,datawizard_crosstabs)\nS3method(print_html,datawizard_table)\nS3method(print_html,datawizard_tables)\nS3method(print_html,dw_data_peek)\nS3method(print_html,parameters_distribution)\nS3method(print_md,data_codebook)\nS3method(print_md,datawizard_crosstab)\nS3method(print_md,datawizard_crosstabs)\nS3method(print_md,datawizard_table)\nS3method(print_md,datawizard_tables)\nS3method(print_md,dw_data_peek)\nS3method(print_md,parameters_distribution)\nS3method(ranktransform,data.frame)\nS3method(ranktransform,factor)\nS3method(ranktransform,grouped_df)\nS3method(ranktransform,numeric)\nS3method(recode_values,character)\nS3method(recode_values,data.frame)\nS3method(recode_values,default)\nS3method(recode_values,factor)\nS3method(recode_values,numeric)\nS3method(replace_nan_inf,data.frame)\nS3method(replace_nan_inf,default)\nS3method(rescale,data.frame)\nS3method(rescale,default)\nS3method(rescale,grouped_df)\nS3method(rescale,numeric)\nS3method(reverse,data.frame)\nS3method(reverse,default)\nS3method(reverse,factor)\nS3method(reverse,grouped_df)\nS3method(reverse,numeric)\nS3method(rowid_as_column,default)\nS3method(rowid_as_column,grouped_df)\nS3method(skewness,data.frame)\nS3method(skewness,default)\nS3method(skewness,matrix)\nS3method(skewness,numeric)\nS3method(slide,data.frame)\nS3method(slide,default)\nS3method(slide,numeric)\nS3method(smoothness,data.frame)\nS3method(smoothness,default)\nS3method(smoothness,numeric)\nS3method(standardize,AsIs)\nS3method(standardize,Date)\nS3method(standardize,Surv)\nS3method(standardize,bcplm)\nS3method(standardize,biglm)\nS3method(standardize,brmsfit)\nS3method(standardize,character)\nS3method(standardize,clm2)\nS3method(standardize,data.frame)\nS3method(standardize,datagrid)\nS3method(standardize,default)\nS3method(standardize,double)\nS3method(standardize,factor)\nS3method(standardize,fixest)\nS3method(standardize,grouped_df)\nS3method(standardize,integer)\nS3method(standardize,logical)\nS3method(standardize,matrix)\nS3method(standardize,mediate)\nS3method(standardize,mixor)\nS3method(standardize,numeric)\nS3method(standardize,visualisation_matrix)\nS3method(standardize,wbgee)\nS3method(standardize,wbm)\nS3method(summary,parameters_kurtosis)\nS3method(summary,parameters_skewness)\nS3method(to_factor,Date)\nS3method(to_factor,character)\nS3method(to_factor,data.frame)\nS3method(to_factor,default)\nS3method(to_factor,double)\nS3method(to_factor,factor)\nS3method(to_factor,haven_labelled)\nS3method(to_factor,logical)\nS3method(to_factor,numeric)\nS3method(to_numeric,Date)\nS3method(to_numeric,POSIXct)\nS3method(to_numeric,POSIXlt)\nS3method(to_numeric,POSIXt)\nS3method(to_numeric,character)\nS3method(to_numeric,data.frame)\nS3method(to_numeric,default)\nS3method(to_numeric,double)\nS3method(to_numeric,factor)\nS3method(to_numeric,haven_labelled)\nS3method(to_numeric,logical)\nS3method(to_numeric,numeric)\nS3method(unnormalize,data.frame)\nS3method(unnormalize,default)\nS3method(unnormalize,grouped_df)\nS3method(unnormalize,numeric)\nS3method(unstandardize,array)\nS3method(unstandardize,character)\nS3method(unstandardize,data.frame)\nS3method(unstandardize,datagrid)\nS3method(unstandardize,factor)\nS3method(unstandardize,grouped_df)\nS3method(unstandardize,matrix)\nS3method(unstandardize,numeric)\nS3method(unstandardize,visualisation_matrix)\nS3method(winsorize,character)\nS3method(winsorize,data.frame)\nS3method(winsorize,factor)\nS3method(winsorize,logical)\nS3method(winsorize,numeric)\nexport(adjust)\nexport(as.prop.table)\nexport(assign_labels)\nexport(categorize)\nexport(center)\nexport(centre)\nexport(change_scale)\nexport(coef_var)\nexport(coerce_to_numeric)\nexport(colnames_to_row)\nexport(column_as_rownames)\nexport(contr.deviation)\nexport(convert_na_to)\nexport(convert_to_na)\nexport(data_addprefix)\nexport(data_addsuffix)\nexport(data_adjust)\nexport(data_arrange)\nexport(data_codebook)\nexport(data_duplicated)\nexport(data_extract)\nexport(data_filter)\nexport(data_group)\nexport(data_join)\nexport(data_match)\nexport(data_merge)\nexport(data_modify)\nexport(data_partition)\nexport(data_peek)\nexport(data_read)\nexport(data_relocate)\nexport(data_remove)\nexport(data_rename)\nexport(data_rename_rows)\nexport(data_reorder)\nexport(data_replicate)\nexport(data_restoretype)\nexport(data_rotate)\nexport(data_seek)\nexport(data_select)\nexport(data_separate)\nexport(data_summary)\nexport(data_tabulate)\nexport(data_to_long)\nexport(data_to_wide)\nexport(data_transpose)\nexport(data_ungroup)\nexport(data_unique)\nexport(data_unite)\nexport(data_write)\nexport(degroup)\nexport(demean)\nexport(describe_distribution)\nexport(detrend)\nexport(display)\nexport(distribution_coef_var)\nexport(distribution_mode)\nexport(empty_columns)\nexport(empty_rows)\nexport(extract_column_names)\nexport(find_columns)\nexport(kurtosis)\nexport(labels_to_levels)\nexport(mean_sd)\nexport(means_by_group)\nexport(median_mad)\nexport(normalize)\nexport(print_html)\nexport(print_md)\nexport(ranktransform)\nexport(recode_into)\nexport(recode_values)\nexport(remove_empty)\nexport(remove_empty_columns)\nexport(remove_empty_rows)\nexport(replace_nan_inf)\nexport(rescale)\nexport(rescale_weights)\nexport(reshape_ci)\nexport(reshape_longer)\nexport(reshape_wider)\nexport(reverse)\nexport(reverse_scale)\nexport(row_count)\nexport(row_means)\nexport(row_sums)\nexport(row_to_colnames)\nexport(rowid_as_column)\nexport(rownames_as_column)\nexport(skewness)\nexport(slide)\nexport(smoothness)\nexport(standardise)\nexport(standardize)\nexport(text_concatenate)\nexport(text_format)\nexport(text_fullstop)\nexport(text_lastchar)\nexport(text_paste)\nexport(text_remove)\nexport(text_wrap)\nexport(to_factor)\nexport(to_numeric)\nexport(unnormalize)\nexport(unstandardise)\nexport(unstandardize)\nexport(visualisation_recipe)\nexport(weighted_mad)\nexport(weighted_mean)\nexport(weighted_median)\nexport(weighted_sd)\nexport(winsorize)\nimportFrom(insight,display)\nimportFrom(insight,print_html)\nimportFrom(insight,print_md)\nimportFrom(stats,makepredictcall)\n"
  },
  {
    "path": "NEWS.md",
    "content": "# datawizard 1.3.1\n\nCHANGES\n\n* `data_summary()` now allows expressions to return more than one summary\n  value. For each value, a new column is created. Additionally, the optional\n  `suffix` argument controls the naming of these columns; if `suffix = NULL`,\n  column names are auto-generated (e.g., with numeric suffixes).\n\n* `standardize()` now works on `fixest` estimations (#665).\n\n* `data_read()` and `data_write()` gain a `password` argument, to encrypt and\n  decrypt data files. This currently only works for R file formats (`.rda`,\n  `.rds`, and `.rdata`). Data encryption is based on the AES-GCM algorithm using\n  the `openssl::aes_gcm_encrypt()` function (#675).\n\nFIXES\n\n* Fix a test due to R-devel change (#677).\n\n# datawizard 1.3.0\n\nBREAKING CHANGES\n\n* Argument `values_fill` in `data_to_wide()` is now defunct, because it did not\n  work as intended (#645).\n\n* `data_to_wide()` no longer removes empty columns that were created after\n  widening data frames, to behave similarly to `tidyr::pivot_wider()` (#645).\n\nCHANGES\n\n* `data_tabulate()` now saves the table of proportions for crosstables as\n  attribute, accessible via the new `as.prop.table()` method (#656).\n\n* Due to changes in the package `insight`, `data_tabulate()` no longer prints\n  decimals when all values in a column are integers (#641).\n\n* Argument `values_from` in `data_to_wide()` now supports select-helpers like\n  the `select` argument in other `{datawizard}` functions (#645).\n\n* Added a `display()` method for `data_codebook()` (#646).\n\n* `display()` methods now support the `{tinytable}` package. Use `format = \"tt\"`\n  to export tables as `tinytable` objects (#646).\n\n* Improved performance for several functions that process grouped data frames\n  when the input is a grouped `tibble` (#651).\n\nBUG FIXES\n\n* Fixed an issue when `demean()`ing nested structures with more than 2 grouping\n  variables (#635).\n\n* Fixed an issue when `demean()`ing crossed structures with more than 2 grouping\n  variables (#638).\n\n* Fixed issue in `data_to_wide()` with multiple variables assigned in\n  `values_from` when IDs were not balanced (equally spread across observations)\n  (#644).\n\n* Fixed issue in `data_replicate()` when data frame had only one column to\n  replicate (#654).\n\n# datawizard 1.2.0\n\nBREAKING CHANGES\n\n* The following deprecated arguments have been removed (#603):\n  - `drop_na` in `data_match()`\n  - `safe`, `pattern`, and `verbose` in `data_rename()`\n\nCHANGES\n\n* `data_read()` and `data_write()` now support the `.parquet` file format, via\n  the *nanoparquet* package (#625).\n\n* `data_tabulate()` gets a `display()` method (#627).\n\n* `data_tabulate()` gets an `as.table()` method to coerce the frequency or\n  contingency table into a (list of) `table()` object(s). This can be useful for\n  further statistical analysis, e.g. in combination with `chisq.test()` (#629).\n\n* The `print()` method for `data_tabulate()` now appears in the documentation,\n  making the `big_mark` argument visible (#627).\n\nBUG FIXES\n\n* Fixed an issue when printing cross tables using `data_tabulate(by = ...)`,\n  which was caused by the recent changes in `insight::export_table()`.\n\n* Fixed another issue when printing cross tables using `data_tabulate(by = ...)`,\n  when more than one variable was selected for `select` (#630).\n\n* Fixed typo in the documentation of `data_match()`.\n\n# datawizard 1.1.0\n\nBREAKING CHANGES\n\n* `data_read()` now also returns Bayesian models from packages *brms* and\n  *rstanarm* as original model objects, and no longer coerces them into data\n  frames (#606).\n\n* The output format of `describe_distribution()` on grouped data has changed.\n  Before, it printed one table per group combination. Now, it prints a single\n  table with group columns at the start (#610).\n\n* The output format of `describe_distribution()` when confidence intervals are\n  requested has changed. Now, for each centrality measure a confidence interval\n  is calculated (#617).\n\n* `data_modify()` now always uses values of a vector for a modified or newly\n  created variable, and no longer tries to detect whether a character value\n  possibly contains an expression. To allow expression provided as string (or\n  character vectors), use the helper-function `as_expr()`. Only literal\n  expressions or strings wrapped in `as_expr()` will be evaluated as\n  expressions, everything else will be treated as vector with values for new\n  variables (#605).\n\nCHANGES\n\n* `display()` is now re-exported from package *insight*.\n\n* `data_read()` and `data_write()` now rely on base-R functions for files of\n  type `.rds`, `.rda` or `.rdata`. Thus, package *rio*  is no longer required\n  to be installed for these file types (#607).\n\n* `data_codebook()` gives an informative warning when no column names matched\n  the selection pattern (#601).\n\n* `data_to_long()` now errors when columns selected to reshape do not exist in\n  the data, to avoid nonsensical results that could be missed (#602).\n\n* New argument `by` in `describe_distribution()` (#604).\n\n* `describe_distribution()` now gives informative errors when column names\n  in the input data frame conflict with column from the output table (#612).\n\n* The methods for `parameters_distribution` objects are now defined in\n  `datawizard` (they were previously in `parameters`) (#613).\n\nBUG FIXES\n\n* Fixed bug in `data_to_wide()`, where new column names in `names_from` were\n  ignored when that column only contained one unique value.\n\n* Fixed bug in `describe_distribution()` when some group combinations\n  didn't appear in the data (#609).\n\n* Fixed bug in `describe_distribution()` when more than one value for the\n  `centrality` argument were specified (#617).\n\n* Fixed bug in `describe_distribution()` where setting `verbose = FALSE`\n  didn't hide some warnings (#617).\n\n* Fixed warning in `data_summary()` when a variable had the same name as\n  another object in the global environment (#585).\n\n# datawizard 1.0.2\n\nBUG FIXES\n\n* Fixed failing R CMD check on ATLAS, noLD, and OpenBLAS due to small numerical\n  differences (#592).\n\n# datawizard 1.0.1\n\nBUG FIXES\n\n* Fixed issue in `data_arrange()` for data frames that only had one column.\n  Formerly, the data frame was coerced into a vector, now the data frame class\n  is preserved.\n\n* Fixed issue in R-devel (4.5.0) due to a change in how `grep()` handles logical\n  arguments with missing values (#588).\n\n# datawizard 1.0.0\n\nBREAKING CHANGES AND DEPRECATIONS\n\n* *datawizard* now requires R >= 4.0 (#515).\n\n* Argument `drop_na` in `data_match()` is deprecated now. Please use\n  `remove_na` instead (#556).\n\n* In `data_rename()` (#567):\n  - argument `pattern` is deprecated. Use `select` instead.\n  - argument `safe` is deprecated. The function now errors when `select`\n    contains unknown column names.\n  - when `replacement` is `NULL`, an error is now thrown (previously, column\n    indices were used as new names).\n  - if `select` (previously `pattern`) is a named vector, then all elements\n    must be named, e.g. `c(length = \"Sepal.Length\", \"Sepal.Width\")` errors.\n\n* Order of arguments `by` and `probability_weights` in `rescale_weights()` has\n  changed, because for `method = \"kish\"`, the `by` argument is optional (#575).\n\n* The name of the rescaled weights variables in `rescale_weights()` have been\n  renamed. `pweights_a` and `pweights_b` are now named `rescaled_weights_a`\n  and `rescaled_weights_b` (#575).\n\n* `print()` methods for `data_tabulate()` with multiple sub-tables (i.e. when\n  length of `by` was > 1) were revised. Now, an integrated table instead of\n  multiple tables is returned. Furthermore, `print_html()` did not work, which\n  was also fixed now (#577).\n\n* `demean()` (and `degroup()`) gets an `append` argument that defaults to `TRUE`,\n  to append the centered variables to the original data frame, instead of\n  returning the de- and group-meaned variables only. Use `append = FALSE` to\n  for the previous default behaviour (i.e. only returning the newly created\n  variables) (#579).\n\nCHANGES\n\n* `rescale_weights()` gets a `method` argument, to choose method to rescale\n  weights. Options are `\"carle\"` (the default) and `\"kish\"` (#575).\n\n* The `select` argument, which is available in different functions to select\n  variables, can now also be a character vector with quoted variable names,\n  including a colon to indicate a range of several variables (e.g. `\"cyl:gear\"`)\n  (#551).\n\n* New function `row_sums()`, to calculate row sums (optionally with minimum\n  amount of valid values), as complement to `row_means()` (#552).\n\n* New function `row_count()`, to count specific values row-wise (#553).\n\n* `data_read()` no longer shows warning about forthcoming breaking changes\n  in upstream packages when reading `.RData` files (#557).\n\n* `data_modify()` now recognizes `n()`, for example to create an index for data\n  groups with `1:n()` (#535).\n\n* The `replacement` argument in `data_rename()` now supports glue-styled\n  tokens  (#563).\n\n* `data_summary()` also accepts the results of `bayestestR::ci()` as summary\n  function (#483).\n\n* `ranktransform()` has a new argument `zeros` to determine how zeros should be\n  handled when `sign = TRUE` (#573).\n\nBUG FIXES\n\n* `describe_distribution()` no longer errors if the sample was too sparse to compute\n  CIs. Instead, it warns the user and returns `NA` (#550).\n\n* `data_read()` preserves variable types when importing files from `rds` or\n  `rdata` format (#558).\n\n# datawizard 0.13.0\n\nBREAKING CHANGES\n\n* `data_rename()` now errors when the `replacement` argument contains `NA` values\n  or empty strings (#539).\n\n* Removed deprecated functions `get_columns()`, `data_find()`, `format_text()` (#546).\n\n* Removed deprecated arguments `group` and `na.rm` in multiple functions. Use `by` and `remove_na` instead (#546).\n\n* The default value for the argument `dummy_factors` in `to_numeric()` has\n  changed from `TRUE` to `FALSE` (#544).\n\nCHANGES\n\n* The `pattern` argument in `data_rename()` can also be a named vector. In this\n  case, names are used as values for the `replacement` argument (i.e. `pattern`\n  can be a character vector using `<new name> = \"<old name>\"`).\n\n* `categorize()` gains a new `breaks` argument, to decide whether breaks are\n  inclusive or exclusive (#548).\n\n* The `labels` argument in `categorize()` gets two new options, `\"range\"` and\n  `\"observed\"`, to use the range of categorized values as labels (i.e. factor\n  levels) (#548).\n\n* Minor additions to `reshape_ci()` to work with forthcoming changes in the\n  `{bayestestR}` package.\n\n# datawizard 0.12.3\n\nCHANGES\n\n* `demean()` (and `degroup()`) now also work for nested designs, if argument\n  `nested = TRUE` and  `by` specifies more than one variable (#533).\n\n* Vignettes are no longer provided in the package, they are now only available\n  on the website. There is only one \"Overview\" vignette available in the package,\n  it contains links to the other vignettes on the website. This is because there\n  are CRAN errors occurring when building vignettes on macOS and we couldn't\n  determine the cause after multiple patch releases (#534).\n\n# datawizard 0.12.2\n\n* Remove `htmltools` from `Suggests` in an attempt of fixing an error in CRAN\n  checks due to failures to build a vignette (#528).\n\n# datawizard 0.12.1\n\nThis is a patch release to fix one error on CRAN checks occurring because of a\nmissing package namespace in one of the vignettes.\n\n# datawizard 0.12.0\n\nBREAKING CHANGES\n\n* The argument `include_na` in `data_tabulate()` and `data_summary()` has been\n  renamed into `remove_na`. Consequently, to mimic former behaviour, `FALSE` and\n  `TRUE` need to be switched (i.e. `remove_na = TRUE` is equivalent to the former\n  `include_na = FALSE`).\n\n* Class names for objects returned by `data_tabulate()` have been changed to\n  `datawizard_table` and `datawizard_crosstable` (resp. the plural forms,\n  `*_tables`), to provide a clearer and more consistent naming scheme.\n\nCHANGES\n\n* `data_select()` can directly rename selected variables when a named vector\n  is provided in `select`, e.g. `data_select(mtcars, c(new1 = \"mpg\", new2 = \"cyl\"))`.\n\n* `data_tabulate()` gains an `as.data.frame()` method, to return the frequency\n  table as a data frame. The structure of the returned object is a nested data\n  frame, where the first column contains name of the variable for which\n  frequencies were calculated, and the second column contains the frequency table.\n\n* `demean()` (and `degroup()`) now also work for cross-classified designs, or\n  more generally, for data with multiple grouping or cluster variables (i.e.\n  `by` can now specify more than one variable).\n\n# datawizard 0.11.0\n\nBREAKING CHANGES\n\n* Arguments named `group` or `group_by` are deprecated and will be removed\n  in a future release. Please use `by` instead. This affects the following\n  functions in *datawizard* (#502).\n\n  * `data_partition()`\n  * `demean()` and `degroup()`\n  * `means_by_group()`\n  * `rescale_weights()`\n\n* Following aliases are deprecated and will be removed in a future release (#504):\n\n  * `get_columns()`, use `data_select()` instead.\n  * `data_find()` and `find_columns()`, use `extract_column_names()` instead.\n  * `format_text()`, use `text_format()` instead.\n\nCHANGES\n\n* `recode_into()` is more relaxed regarding checking the type of `NA` values.\n  If you recode into a numeric variable, and one of the recode values is `NA`,\n  you no longer need to use `NA_real_` for numeric `NA` values.\n\n* Improved documentation for some functions.\n\nBUG FIXES\n\n* `data_to_long()` did not work for data frame where columns had attributes\n  (like labelled data).\n\n# datawizard 0.10.0\n\nBREAKING CHANGES\n\n* The following arguments were deprecated in 0.5.0 and are now removed:\n\n  * in `data_to_wide()`: `colnames_from`, `rows_from`, `sep`\n  * in `data_to_long()`: `colnames_to`\n  * in `data_partition()`: `training_proportion`\n\nNEW FUNCTIONS\n\n* `data_summary()`, to compute summary statistics of (grouped) data frames.\n\n* `data_replicate()`, to expand a data frame by replicating rows based on another\n  variable that contains the counts of replications per row.\n\nCHANGES\n\n* `data_modify()` gets three new arguments, `.at`, `.if` and `.modify`, to modify\n  variables at specific positions or based on logical conditions.\n\n* `data_tabulate()` was revised and gets several new arguments: a `weights`\n  argument, to compute weighted frequency tables. `include_na` allows to include\n  or omit missing values from the table. Furthermore, a `by` argument was added,\n  to compute crosstables (#479, #481).\n\n# datawizard 0.9.1\n\nCHANGES\n\n* `rescale()` gains `multiply` and `add` arguments, to expand ranges by a given\n  factor or value.\n\n* `to_factor()` and `to_numeric()` now support class `haven_labelled`.\n\nBUG FIXES\n\n* `to_numeric()` now correctly deals with inversed factor levels when\n  `preserve_levels = TRUE`.\n\n* `to_numeric()` inversed order of value labels when `dummy_factors = FALSE`.\n\n* `convert_to_na()` now preserves attributes for factors when `drop_levels = TRUE`.\n\n# datawizard 0.9.0\n\nNEW FUNCTIONS\n\n* `row_means()`, to compute row means, optionally only for the rows with at\n  least `min_valid` non-missing values.\n\n* `contr.deviation()` for sum-deviation contrast coding of factors.\n\n* `means_by_group()`, to compute mean values of variables, grouped by levels\n  of specified factors.\n\n* `data_seek()`, to seek for variables in a data frame, based on their\n  column names, variables labels, value labels or factor levels. Searching for\n  labels only works for \"labelled\" data, i.e. when variables have a `label` or\n  `labels` attribute.\n\nCHANGES\n\n* `recode_into()` gains an `overwrite` argument to skip overwriting already\n  recoded cases when multiple recode patterns apply to the same case.\n\n* `recode_into()` gains an `preserve_na` argument to preserve `NA` values\n  when recoding.\n\n* `data_read()` now passes the `encoding` argument to `data.table::fread()`.\n  This allows to read files with non-ASCII characters.\n\n* `datawizard` moves from the GPL-3 license to the MIT license.\n\n* `unnormalize()` and `unstandardize()` now work with grouped data (#415).\n\n* `unnormalize()` now errors instead of emitting a warning if it doesn't have the\n  necessary info (#415).\n\nBUG FIXES\n\n* Fixed issue in `labels_to_levels()` when values of labels were not in sorted\n  order and values were not sequentially numbered.\n\n* Fixed issues in `data_write()` when writing labelled data into SPSS format\n  and vectors were of different type as value labels.\n\n* Fixed issues in `data_write()` when writing labelled data into SPSS format\n  for character vectors with missing value labels, but existing variable\n  labels.\n\n* Fixed issue in `recode_into()` with probably wrong case number printed in the\n  warning when several recode patterns match to one case.\n\n* Fixed issue in `recode_into()` when original data contained `NA` values and\n  `NA` was not included in the recode pattern.\n\n* Fixed issue in `data_filter()` where functions containing a `=` (e.g. when\n  naming arguments, like `grepl(pattern, x = a)`) were mistakenly seen as\n  faulty syntax.\n\n* Fixed issue in `empty_column()` for strings with invalid multibyte strings.\n  For such data frames or files, `empty_column()` or `data_read()` no longer\n  fails.\n\n# datawizard 0.8.0\n\nBREAKING CHANGES\n\n* The following re-exported functions from `{insight}` have now been removed:\n  `object_has_names()`, `object_has_rownames()`, `is_empty_object()`,\n  `compact_list()`, `compact_character()`.\n\n* Argument `na.rm` was renamed to `remove_na` throughout `{datawizard}` functions.\n  `na.rm` is kept for backward compatibility, but will be deprecated and later\n  removed in future updates.\n\n* The way expressions are defined in `data_filter()` was revised. The `filter`\n  argument was replaced by `...`, allowing to separate multiple expression with\n  a comma (which are then combined with `&`). Furthermore, expressions can now also be\n  defined as strings, or be provided as character vectors, to allow string-friendly\n  programming.\n\nCHANGES\n\n* Weighted-functions (`weighted_sd()`, `weighted_mean()`, ...) gain a `remove_na`\n  argument, to remove or keep missing and infinite values. By default,\n  `remove_na = TRUE`, i.e. missing and infinite values are removed by default.\n\n* `reverse_scale()`, `normalize()` and `rescale()` gain an `append` argument\n  (similar to other data frame methods of transformation functions), to append\n  recoded variables to the input data frame instead of overwriting existing\n  variables.\n\nNEW FUNCTIONS\n\n* `rowid_as_column()` to complement `rownames_as_column()` (and to mimic\n  `tibble::rowid_to_column()`). Note that its behavior is different from\n  `tibble::rowid_to_column()` for grouped data. See the Details section in the\n  docs.\n\n* `data_unite()`, to merge values of multiple variables into one new variable.\n\n* `data_separate()`, as counterpart to `data_unite()`, to separate a single\n  variable into multiple new variables.\n\n* `data_modify()`, to create new variables, or modify or remove existing\n  variables in a data frame.\n\nMINOR CHANGES\n\n* `to_numeric()` for variables of type `Date`, `POSIXct` and `POSIXlt` now\n  includes the class name in the warning message.\n\n* Added a `print()` method for `center()`, `standardize()`, `normalize()` and\n  `rescale()`.\n\nBUG FIXES\n\n* `standardize_parameters()` now works when the package namespace is in the model\n  formula (#401).\n\n* `data_merge()` no longer yields a warning for `tibbles` when `join = \"bind\"`.\n\n* `center()` and `standardize()` did not work for grouped data frames (of class\n  `grouped_df`) when `force = TRUE`.\n\n* The `data.frame` method of `describe_distribution()` returns `NULL` instead of\n  an error if no valid variable were passed (for example a factor variable with\n  `include_factors = FALSE`) (#421).\n\n# datawizard 0.7.1\n\nBREAKING CHANGES\n\n* `add_labs()` was renamed into `assign_labels()`. Since `add_labs()` existed\n  only for a few days, there will be no alias for backwards compatibility.\n\nNEW FUNCTIONS\n\n* `labels_to_levels()`, to use value labels of factors as their levels.\n\nMINOR CHANGES\n\n* `data_read()` now checks if the imported object actually is a data frame (or\n  coercible to a data frame), and if not, no longer errors, but gives an\n  informative warning of the type of object that was imported.\n\nBUG FIXES\n\n* Fix test for CRAN check on Mac OS arm64\n\n# datawizard 0.7.0\n\nBREAKING CHANGES\n\n* In selection patterns, expressions like `-var1:var3` to exclude all variables\n  between `var1` and `var3` are no longer accepted. The correct expression is\n  `-(var1:var3)`. This is for 2 reasons:\n\n  * to be consistent with the behavior for numerics (`-1:2` is not accepted but\n    `-(1:2)` is);\n  * to be consistent with `dplyr::select()`, which throws a warning and only\n    uses the first variable in the first expression.\n\nNEW FUNCTIONS\n\n* `recode_into()`, similar to `dplyr::case_when()`, to recode values from one\n  or more variables into a new variable.\n\n* `mean_sd()` and `median_mad()` for summarizing vectors to their mean (or\n  median) and a range of one SD (or MAD) above and below.\n\n* `data_write()` as counterpart to `data_read()`, to write data frames into\n  CSV, SPSS, SAS, Stata files and many other file types. One advantage over\n  existing functions to write data in other packages is that labelled (numeric)\n  data can be converted into factors (with values labels used as factor levels)\n  even for text formats like CSV and similar. This allows exporting \"labelled\"\n  data into those file formats, too.\n\n* `add_labs()`, to manually add value and variable labels as attributes to\n  variables. These attributes are stored as `\"label\"` and `\"labels\"` attributes,\n  similar to the `labelled` class from the _haven_ package.\n\nMINOR CHANGES\n\n* `data_rename()` gets a `verbose` argument.\n* `winsorize()` now errors if the threshold is incorrect (previously, it provided\n  a warning and returned the unchanged data). The argument `verbose` is now\n  useless but is kept for backward compatibility. The documentation now contains\n  details about the valid values for `threshold` (#357).\n* In all functions that have arguments `select` and/or `exclude`, there is now\n  one warning per misspelled variable. The previous behavior was to have only one\n  warning.\n* Fixed inconsistent behaviour in `standardize()` when only one of the arguments\n  `center` or `scale` were provided (#365).\n* `unstandardize()` and `replace_nan_inf()` now work with select helpers (#376).\n* Added informative warning and error messages to `reverse()`. Furthermore, the\n  docs now describe the `range` argument more clearly (#380).\n* `unnormalize()` errors with unexpected inputs (#383).\n\nBUG FIXES\n\n* `empty_columns()` (and therefore `remove_empty_columns()`) now correctly detects\n  columns containing only `NA_character_` (#349).\n* Select helpers now work in custom functions when argument is called `select`\n  (#356).\n* Fix unexpected warning in `convert_na_to()` when `select` is a list (#352).\n* Fixed issue with correct labelling of numeric variables with more than nine\n  unique values and associated value labels.\n\n\n# datawizard 0.6.5\n\nMAJOR CHANGES\n\n* Etienne Bacher is the new maintainer.\n\nMINOR CHANGES\n\n* `standardize()`, `center()`, `normalize()` and `rescale()` can be used in\n  model formulas, similar to `base::scale()`.\n\n* `data_codebook()` now includes the proportion for each category/value, in\n  addition to the counts. Furthermore, if data contains tagged `NA` values,\n  these are included in the frequency table.\n\nBUG FIXES\n\n* `center(x)` now works correctly when `x` is a single value and either\n  `reference` or `center` is specified (#324).\n\n* Fixed issue in `data_codebook()`, which failed for labelled vectors when\n  values of labels were not in sorted order.\n\n# datawizard 0.6.4\n\nNEW FUNCTIONS\n\n* `data_codebook()`: to generate codebooks of data frames.\n\n* New functions to deal with duplicates: `data_duplicated()` (keep all duplicates,\n  including the first occurrence) and `data_unique()` (returns the data, excluding\n  all duplicates except one instance of each, based on the selected method).\n\nMINOR CHANGES\n\n* `.data.frame` methods should now preserve custom attributes.\n\n* The `include_bounds` argument in `normalize()` can now also be a numeric\n  value, defining the limit to the upper and lower bound (i.e. the distance\n  to 1 and 0).\n\n* `data_filter()` now works with grouped data.\n\nBUG FIXES\n\n* `data_read()` no longer prints message for empty columns when the data\n  actually had no empty columns.\n\n * `data_to_wide()` now drops columns that are not in `id_cols` (if specified),\n  `names_from`, or `values_from`. This is the behaviour observed in `tidyr::pivot_wider()`.\n\n# datawizard 0.6.3\n\nMAJOR CHANGES\n\n* There is a new publication about the `{datawizard}` package:\n  <https://joss.theoj.org/papers/10.21105/joss.04684>\n\n* Fixes failing tests due to changes in `R-devel`.\n\n* `data_to_long()` and `data_to_wide()` have had significant performance\n  improvements, sometimes as high as a ten-fold speedup.\n\nMINOR CHANGES\n\n* When column names are misspelled, most functions now suggest which existing\n  columns possibly could be meant.\n\n* Miscellaneous performance gains.\n\n* `convert_to_na()` now requires argument `na` to be of class 'Date' to convert\n  specific dates to `NA`. For example, `convert_to_na(x, na = \"2022-10-17\")`\n  must be changed to `convert_to_na(x, na = as.Date(\"2022-10-17\"))`.\n\nBUG FIXES\n\n* `data_to_long()` and `data_to_wide()` now correctly keep the `date` format.\n\n# datawizard 0.6.2\n\nBREAKING CHANGES\n\n* Methods for grouped data frames (`.grouped_df`) no longer support\n  `dplyr::group_by()` for `{dplyr}` before version `0.8.0`.\n\n* `empty_columns()` and `remove_empty_columns()` now also remove columns that\n  contain only empty characters. Likewise, `empty_rows()` and\n  `remove_empty_rows()` remove observations that completely have missing or\n  empty character values.\n\nMINOR CHANGES\n\n* `data_read()` gains a `convert_factors` argument, to turn off automatic\n  conversion from numeric variables into factors.\n\nBUG FIXES\n\n* `data_arrange()` now works with data frames that were grouped using\n  `data_group()` (#274).\n\n# datawizard 0.6.1\n\n* Updates tests for upcoming changes in the `{tidyselect}` package (#267).\n\n# datawizard 0.6.0\n\nBREAKING CHANGES\n\n* The minimum needed R version has been bumped to `3.6`.\n\n* Following deprecated functions have been removed:\n\n`data_cut()`, `data_recode()`, `data_shift()`, `data_reverse()`,\n`data_rescale()`, `data_to_factor()`, `data_to_numeric()`\n\n* New `text_format()` alias is introduced for `format_text()`, latter of which\n  will be removed in the next release.\n\n* New `recode_values()` alias is introduced for `change_code()`, latter of which\n  will be removed in the next release.\n\n* `data_merge()` now errors if columns specified in `by` are not in both\n  datasets.\n\n* Using negative values in arguments `select` and `exclude` now removes the\n  columns from the selection/exclusion. The previous behavior was to start the\n  selection/exclusion from the end of the dataset, which was inconsistent with\n  the use of \"-\" with other selecting possibilities.\n\nNEW FUNCTIONS\n\n* `data_peek()`: to peek at values and type of variables in a data frame.\n\n* `coef_var()`: to compute the coefficient of variation.\n\nCHANGES\n\n* `data_filter()` will give more informative messages on malformed syntax of the\n  `filter` argument.\n\n* It is now possible to use curly brackets to pass variable names to\n  `data_filter()`, like the following example. See examples section in the\n  documentation of `data_filter()`.\n\n* The `regex` argument was added to functions that use select-helpers and did\n  not already have this argument.\n\n* Select helpers `starts_with()`, `ends_with()`, and `contains()` now accept\n  several patterns, e.g `starts_with(\"Sep\", \"Petal\")`.\n\n* Arguments `select` and `exclude` that are present in most functions have been\n  improved to work in loops and in custom functions. For example, the following\n  code now works:\n\n```r\nfoo <- function(data) {\n  i <- \"Sep\"\n  find_columns(data, select = starts_with(i))\n}\nfoo(iris)\n\nfor (i in c(\"Sepal\", \"Sp\")) {\n  head(iris) |>\n    find_columns(select = starts_with(i)) |>\n    print()\n}\n```\n\n* There is now a vignette summarizing the various ways to select or exclude\n  variables in most `{datawizard}` functions.\n\n# datawizard 0.5.1\n\n* Fixes failing tests due to `{poorman}` update.\n\n# datawizard 0.5.0\n\nMAJOR CHANGES\n\n* Following statistical transformation functions have been renamed to not have\n  `data_*()` prefix, since they do not work exclusively with data frames, but\n  are typically first of all used with vectors, and therefore had misleading\n  names:\n\n  - `data_cut()` -> `categorize()`\n\n  - `data_recode()` -> `change_code()`\n\n  - `data_shift()` -> `slide()`\n\n  - `data_reverse()` -> `reverse()`\n\n  - `data_rescale()` -> `rescale()`\n\n  - `data_to_factor()` -> `to_factor()`\n\n  - `data_to_numeric()` -> `to_numeric()`\n\nNote that these functions also have `.data.frame()` methods and still work for\ndata frames as well. Former function names are still available as aliases, but\nwill be deprecated and removed in a future release.\n\n* Bumps the needed minimum R version to `3.5`.\n\n* Removed deprecated function `data_findcols()`. Please use its replacement,\n  `data_find()`.\n\n* Removed alias `extract()` for `data_extract()` function since it collided with\n  `tidyr::extract()`.\n\n* Argument `training_proportion` in `data_partition()` is deprecated. Please use\n  `proportion` now.\n\n* Given his continued and significant contributions to the package, Etienne\n  Bacher (@etiennebacher) is now included as an author.\n\n* `unstandardise()` now works for `center(x)`\n\n* `unnormalize()` now works for `change_scale(x)`\n\n* `reshape_wider()` now follows more consistently `tidyr::pivot_wider()` syntax.\n  Arguments `colnames_from`, `sep`, and `rows_from` are deprecated and should be\n  replaced by `names_from`, `names_sep`, and `id_cols` respectively.\n  `reshape_wider()` also gains an argument `names_glue` (#182, #198).\n\n* Similarly, `reshape_longer()` now follows more consistently\n  `tidyr::pivot_longer()` syntax. Argument `colnames_to` is deprecated and\n  should be replaced by `names_to`. `reshape_longer()` also gains new arguments:\n  `names_prefix`, `names_sep`, `names_pattern`, and `values_drop_na` (#189).\n\nCHANGES\n\n* Some of the text formatting helpers (like `text_concatenate()`) gain an\n  `enclose` argument, to wrap text elements with surrounding characters.\n\n* `winsorize` now accepts \"raw\" and \"zscore\" methods (in addition to\n  \"percentile\"). Additionally, when `robust` is set to `TRUE` together with\n  `method = \"zscore\"`, winsorizes via the median and median absolute deviation\n  (MAD); else via the mean and standard deviation. (@rempsyc, #177, #49, #47).\n\n* `convert_na_to` now accepts numeric replacements on character vectors and\n  single replacement for multiple vector classes. (@rempsyc, #214).\n\n* `data_partition()` now allows to create multiple partitions from the data,\n  returning multiple training and a remaining test set.\n\n* Functions like `center()`, `normalize()` or `standardize()` no longer fail\n  when data contains infinite values (`Inf`).\n\nNEW FUNCTIONS\n\n* `row_to_colnames()` and `colnames_to_row()` to move a row to column names, and\n  column names to row (@etiennebacher, #169).\n\n* `data_arrange()` to sort the rows of a dataframe according to the values of\n  the selected columns.\n\nBUG FIXES\n\n* Fixed wrong column names in `data_to_wide()` (#173).\n\n# datawizard 0.4.1\n\nBREAKING\n\n* Added the `standardize.default()` method (moved from package **effectsize**),\n  to be consistent in that the default-method now is in the same package as the\n  generic. `standardize.default()` behaves exactly like in **effectsize** and\n  particularly works for regression model objects. **effectsize** now re-exports\n  `standardize()` from **datawizard**.\n\nNEW FUNCTIONS\n\n* `data_shift()` to shift the value range of numeric variables.\n\n* `data_recode()` to recode old into new values.\n\n* `data_to_factor()` as counterpart to `data_to_numeric()`.\n\n* `data_tabulate()` to create frequency tables of variables.\n\n* `data_read()` to read (import) data files (from text, or foreign statistical\n  packages).\n\n* `unnormalize()` as counterpart to `normalize()`. This function only works for\n  variables that have been normalized with `normalize()`.\n\n* `data_group()` and `data_ungroup()` to create grouped data frames, or to\n  remove the grouping information from grouped data frames.\n\nCHANGES\n\n* `data_find()` was added as alias to `find_colums()`, to have consistent name\n  patterns for the **datawizard** functions. `data_findcols()` will be removed\n  in a future update and usage is discouraged.\n\n* The `select` argument (and thus, also the `exclude` argument) now also accepts\n  functions testing for logical conditions, e.g. `is.numeric()` (or\n  `is.numeric`), or any user-defined function that selects the variables for\n  which the function returns `TRUE` (like: `foo <- function(x) mean(x) > 3`).\n\n* Arguments `select` and `exclude` now allow the negation of select-helpers,\n  like `-ends_with(\"\")`, `-is.numeric` or `-Sepal.Width:Petal.Length`.\n\n* Many functions now get a `.default` method, to capture unsupported classes.\n  This now yields a message and returns the original input, and hence, the\n  `.data.frame` methods won't stop due to an error.\n\n* The `filter` argument in `data_filter()` can also be a numeric vector, to\n  indicate row indices of those rows that should be returned.\n\n* `convert_to_na()` gets methods for variables of class `logical` and `Date`.\n\n* `convert_to_na()` for factors (and data frames) gains a `drop_levels`\n  argument, to drop unused levels that have been replaced by `NA`.\n\n* `data_to_numeric()` gains two more arguments, `preserve_levels` and `lowest`,\n  to give better control of conversion of factors.\n\nBUG FIXES\n\n* When logicals were passed to `center()` or `standardize()` and `force = TRUE`,\n  these were not properly converted to numeric variables.\n\n# datawizard 0.4.0\n\nMAJOR CHANGES\n\n* `data_match()` now returns filtered data by default. Old behavior (returning\n  rows indices) can be set by setting `return_indices = TRUE`.\n\n* The following functions are now re-exported from `{insight}` package:\n  `object_has_names()`, `object_has_rownames()`, `is_empty_object()`,\n  `compact_list()`, `compact_character()`\n\n* `data_findcols()` will become deprecated in future updates. Please use the new\n  replacements `find_columns()` and `get_columns()`.\n\n* The vignette *Analysing Longitudinal or Panel Data* has now moved to\n  [parameters\n  package](https://easystats.github.io/parameters/articles/demean.html).\n\nNEW FUNCTIONS\n\n* To convert rownames to a column, and *vice versa*: `rownames_as_column()` and\n  `column_as_rownames()` (@etiennebacher, #80).\n\n* `find_columns()` and `get_columns()` to find column names or retrieve subsets\n  of data frames, based on various select-methods (including select-helpers).\n  These function will supersede `data_findcols()` in the future.\n\n* `data_filter()` as complement for `data_match()`, which works with logical\n  expressions for filtering rows of data frames.\n\n* For computing weighted centrality measures and dispersion: `weighted_mean()`,\n  `weighted_median()`, `weighted_sd()` and `weighted_mad()`.\n\n* To replace `NA` in vectors and dataframes: `convert_na_to()` (@etiennebacher,\n  #111).\n\nMINOR CHANGES\n\n* The `select` argument in several functions (like `data_remove()`,\n  `reshape_longer()`, or `data_extract()`) now allows the use of select-helpers\n  for selecting variables based on specific patterns.\n\n* `data_extract()` gains new arguments to allow type-safe return values,\n\ni.e. *always* return a vector *or* a data frame. Thus, `data_extract()` can now\nbe used to select multiple variables or pull a single variable from data\nframes.\n\n* `data_match()` gains a `match` argument, to indicate with which logical\n  operation matching results should be combined.\n\n* Improved support for *labelled data* for many functions, i.e. returned data\n  frame will preserve value and variable label attributes, where possible and\n  applicable.\n\n* `describe_distribution()` now works with lists (@etiennebacher, #105).\n\n* `data_rename()` doesn't use `pattern` anymore to rename the columns if\n  `replacement` is not provided (@etiennebacher, #103).\n\n* `data_rename()` now adds a suffix to duplicated names in `replacement`\n  (@etiennebacher, #103).\n\nBUG FIXES\n\n* `data_to_numeric()` produced wrong results for factors when `dummy_factors =\n  TRUE` and factor contained missing values.\n\n* `data_match()` produced wrong results when data contained missing values.\n\n* Fixed CRAN check issues in `data_extract()` when more than one variable was\n  extracted from a data frame.\n\n# datawizard 0.3.0\n\nNEW FUNCTIONS\n\n  * To find or remove empty rows and columns in a data frame: `empty_rows()`,\n    `empty_columns()`, `remove_empty_rows()`, `remove_empty_columns()`, and\n    `remove_empty`.\n\n  * To check for names: `object_has_names()` and `object_has_rownames()`.\n\n  * To rotate data frames: `data_rotate()`.\n\n  * To reverse score variables: `data_reverse()`.\n\n  * To merge/join multiple data frames: `data_merge()` (or its alias\n    `data_join()`).\n\n  * To cut (recode) data into groups: `data_cut()`.\n\n  * To replace specific values with `NA`s: `convert_to_na()`.\n\n  * To replace `Inf` and `NaN` values with `NA`s: `replace_nan_inf()`.\n\n- Arguments `cols`, `before` and `after` in `data_relocate()` can now also be\n  numeric values, indicating the position of the destination column.\n\n# datawizard 0.2.3\n\n- New functions:\n\n  * to work with lists: `is_empty_object()` and `compact_list()`\n\n  * to work with strings: `compact_character()`\n\n# datawizard 0.2.2\n\n- New function `data_extract()` (or its alias `extract()`) to pull single\n  variables from a data frame, possibly naming each value by the row names of\n  that data frame.\n\n- `reshape_ci()` gains a `ci_type` argument, to reshape data frames where\n  CI-columns have prefixes other than `\"CI\"`.\n\n- `standardize()` and `center()` gain arguments `center` and `scale`, to define\n  references for centrality and deviation that are used when centering or\n  standardizing variables.\n\n- `center()` gains the arguments `force` and `reference`, similar to\n  `standardize()`.\n\n- The functionality of the `append` argument in `center()` and `standardize()`\n  was revised. This made the `suffix` argument redundant, and thus it was\n  removed.\n\n- Fixed issue in `standardize()`.\n\n- Fixed issue in `data_findcols()`.\n\n# datawizard 0.2.1\n\n- Exports `plot` method for `visualisation_recipe()` objects from `{see}`\n  package.\n\n- `centre()`, `standardise()`, `unstandardise()` are exported as aliases for\n  `center()`, `standardize()`, `unstandardize()`, respectively.\n\n# datawizard 0.2.0.1\n\n- This is mainly a maintenance release that addresses some issues with\n  conflicting namespaces.\n\n# datawizard 0.2.0\n\n- New function: `visualisation_recipe()`.\n\n- The following function has now moved to *performance* package:\n  `check_multimodal()`.\n\n- Minor updates to documentation, including a new vignette about `demean()`.\n\n# datawizard 0.1.0\n\n* First release.\n"
  },
  {
    "path": "R/adjust.R",
    "content": "#' Adjust data for the effect of other variable(s)\n#'\n#' This function can be used to adjust the data for the effect of other\n#' variables present in the dataset. It is based on an underlying fitting of\n#' regressions models, allowing for quite some flexibility, such as including\n#' factors as random effects in mixed models (multilevel partialization),\n#' continuous variables as smooth terms in general additive models (non-linear\n#' partialization) and/or fitting these models under a Bayesian framework. The\n#' values returned by this function are the residuals of the regression models.\n#' Note that a regular correlation between two \"adjusted\" variables is\n#' equivalent to the partial correlation between them.\n#'\n#' @param data A data frame.\n#' @param effect Character vector of column names to be adjusted for (regressed\n#'   out). If `NULL` (the default), all variables will be selected.\n#' @param multilevel If `TRUE`, the factors are included as random factors.\n#'   Else, if `FALSE` (default), they are included as fixed effects in the\n#'   simple regression model.\n#' @param additive If `TRUE`, continuous variables as included as smooth terms\n#'   in additive models. The goal is to regress-out potential non-linear\n#'   effects.\n#' @param bayesian If `TRUE`, the models are fitted under the Bayesian framework\n#'   using `rstanarm`.\n#' @param keep_intercept If `FALSE` (default), the intercept of the model is\n#'   re-added. This avoids the centering around 0 that happens by default\n#'   when regressing out another variable (see the examples below for a\n#'   visual representation of this).\n#' @inheritParams extract_column_names\n#' @inheritParams standardize\n#'\n#' @return A data frame comparable to `data`, with adjusted variables.\n#'\n#' @examplesIf all(insight::check_if_installed(c(\"bayestestR\", \"rstanarm\", \"gamm4\"), quietly = TRUE))\n#' adjusted_all <- adjust(attitude)\n#' head(adjusted_all)\n#' adjusted_one <- adjust(attitude, effect = \"complaints\", select = \"rating\")\n#' head(adjusted_one)\n#' \\donttest{\n#' adjust(attitude, effect = \"complaints\", select = \"rating\", bayesian = TRUE)\n#' adjust(attitude, effect = \"complaints\", select = \"rating\", additive = TRUE)\n#' attitude$complaints_LMH <- cut(attitude$complaints, 3)\n#' adjust(attitude, effect = \"complaints_LMH\", select = \"rating\", multilevel = TRUE)\n#' }\n#'\n#' # Generate data\n#' data <- bayestestR::simulate_correlation(n = 100, r = 0.7)\n#' data$V2 <- (5 * data$V2) + 20 # Add intercept\n#'\n#' # Adjust\n#' adjusted <- adjust(data, effect = \"V1\", select = \"V2\")\n#' adjusted_icpt <- adjust(data, effect = \"V1\", select = \"V2\", keep_intercept = TRUE)\n#'\n#' # Visualize\n#' plot(\n#'   data$V1, data$V2,\n#'   pch = 19, col = \"blue\",\n#'   ylim = c(min(adjusted$V2), max(data$V2)),\n#'   main = \"Original (blue), adjusted (green), and adjusted - intercept kept (red) data\"\n#' )\n#' abline(lm(V2 ~ V1, data = data), col = \"blue\")\n#' points(adjusted$V1, adjusted$V2, pch = 19, col = \"green\")\n#' abline(lm(V2 ~ V1, data = adjusted), col = \"green\")\n#' points(adjusted_icpt$V1, adjusted_icpt$V2, pch = 19, col = \"red\")\n#' abline(lm(V2 ~ V1, data = adjusted_icpt), col = \"red\")\n#'\n#' @export\nadjust <- function(\n  data,\n  effect = NULL,\n  select = is.numeric,\n  exclude = NULL,\n  multilevel = FALSE,\n  additive = FALSE,\n  bayesian = FALSE,\n  keep_intercept = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE\n) {\n  # make sure column names are syntactically valid\n  .check_dataframe_names(data, action = \"error\")\n\n  # check for formula notation, convert to character vector\n  if (inherits(effect, \"formula\")) {\n    effect <- all.vars(effect)\n  }\n\n  # Find predictors\n  if (is.null(effect)) {\n    effect <- names(data)\n  }\n\n  if (is.null(select)) {\n    select <- is.numeric\n  }\n\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # Factors\n  formula_random <- NULL\n  facs <- names(data[effect][!vapply(data[effect], is.numeric, logical(1L))])\n  if (length(facs) >= 1 && multilevel) {\n    if (additive) {\n      formula_random <- stats::as.formula(paste(\n        \"~\",\n        paste(paste0(\"(1|\", facs, \")\"), collapse = \" + \")\n      ))\n    } else {\n      formula_random <- paste(\n        \"+\",\n        paste(paste0(\"(1|\", facs, \")\"), collapse = \" + \")\n      )\n    }\n    effect <- effect[!effect %in% facs]\n  }\n\n  # Fit models\n  out <- data.frame(.ID = seq_len(nrow(data)))\n  for (var in select) {\n    predictors <- effect[effect != var]\n    if (additive) {\n      predictors_num <- names(data[predictors][vapply(\n        data[predictors],\n        is.numeric,\n        logical(1L)\n      )])\n      predictors[predictors == predictors_num] <- paste0(\n        \"s(\",\n        predictors_num,\n        \")\"\n      )\n    }\n    formula_predictors <- paste(c(\"1\", predictors), collapse = \" + \")\n    model_formula <- paste(var, \"~\", formula_predictors)\n\n    x <- .model_adjust_for(\n      data = data[unique(c(var, effect, facs))],\n      model_formula = model_formula,\n      multilevel = multilevel,\n      additive = additive,\n      bayesian = bayesian,\n      formula_random = formula_random,\n      keep_intercept = keep_intercept\n    )\n    out[var] <- x\n  }\n  out[names(data)[!names(data) %in% names(out)]] <- data[names(data)[\n    !names(data) %in% names(out)\n  ]]\n  out[names(data)]\n}\n\n#' @rdname adjust\n#' @export\ndata_adjust <- adjust\n\n\n#' @keywords internal\n.model_adjust_for <- function(\n  data,\n  model_formula,\n  multilevel = FALSE,\n  additive = FALSE,\n  bayesian = FALSE,\n  formula_random = NULL,\n  keep_intercept = FALSE\n) {\n  # Additive -----------------------\n  if (additive) {\n    # Bayesian\n    if (bayesian) {\n      insight::check_if_installed(\"rstanarm\")\n      model <- rstanarm::stan_gamm4(\n        stats::as.formula(model_formula),\n        random = formula_random,\n        data = data,\n        refresh = 0\n      )\n      # Frequentist\n    } else {\n      insight::check_if_installed(\"gamm4\")\n      model <- gamm4::gamm4(\n        stats::as.formula(model_formula),\n        random = formula_random,\n        data = data\n      )\n    }\n\n    # Linear -------------------------\n  } else if (bayesian) {\n    # Bayesian\n    insight::check_if_installed(\"rstanarm\")\n    if (multilevel) {\n      model <- rstanarm::stan_lmer(\n        paste(model_formula, formula_random),\n        data = data,\n        refresh = 0\n      )\n    } else {\n      model <- rstanarm::stan_glm(model_formula, data = data, refresh = 0)\n    }\n  } else if (multilevel) {\n    # Frequentist\n    insight::check_if_installed(\"lme4\")\n    model <- lme4::lmer(paste(model_formula, formula_random), data = data)\n  } else {\n    model <- stats::lm(model_formula, data = data)\n  }\n\n  adjusted <- insight::get_residuals(model)\n\n  # Re-add intercept if need be\n  if (keep_intercept) {\n    intercept <- insight::get_intercept(model)\n    if (length(intercept) > 1) {\n      intercept <- stats::median(intercept)\n    } # For bayesian model\n    if (is.na(intercept)) {\n      intercept <- 0\n    }\n    adjusted <- adjusted + intercept\n  }\n\n  # Deal with missing data\n  out <- rep(NA, nrow(data))\n  out[stats::complete.cases(data)] <- as.vector(adjusted)\n\n  out\n}\n"
  },
  {
    "path": "R/assign_labels.R",
    "content": "#' @title Assign variable and value labels\n#' @name assign_labels\n#'\n#' @description\n#' Assign variable and values labels to a variable or variables in a data frame.\n#' Labels are stored as attributes (`\"label\"` for variable labels and `\"labels\"`)\n#' for value labels.\n#'\n#' @param x A data frame, factor or vector.\n#' @param variable The variable label as string.\n#' @param values The value labels as (named) character vector. If `values` is\n#' *not* a named vector, the length of labels must be equal to the length of\n#' unique values. For a named vector, the left-hand side (LHS) is the value in\n#' `x`, the right-hand side (RHS) the associated value label. Non-matching\n#' labels are omitted.\n#' @param ... Currently not used.\n#' @inheritParams extract_column_names\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @return A labelled variable, or a data frame of labelled variables.\n#'\n#' @examples\n#' x <- 1:3\n#' # labelling by providing required number of labels\n#' assign_labels(\n#'   x,\n#'   variable = \"My x\",\n#'   values = c(\"one\", \"two\", \"three\")\n#' )\n#'\n#' # labelling using named vectors\n#' data(iris)\n#' out <- assign_labels(\n#'   iris$Species,\n#'   variable = \"Labelled Species\",\n#'   values = c(`setosa` = \"Spec1\", `versicolor` = \"Spec2\", `virginica` = \"Spec3\")\n#' )\n#' str(out)\n#'\n#' # data frame example\n#' out <- assign_labels(\n#'   iris,\n#'   select = \"Species\",\n#'   variable = \"Labelled Species\",\n#'   values = c(`setosa` = \"Spec1\", `versicolor` = \"Spec2\", `virginica` = \"Spec3\")\n#' )\n#' str(out$Species)\n#'\n#' # Partial labelling\n#' x <- 1:5\n#' assign_labels(\n#'   x,\n#'   variable = \"My x\",\n#'   values = c(`1` = \"lowest\", `5` = \"highest\")\n#' )\n#' @export\nassign_labels <- function(x, ...) {\n  UseMethod(\"assign_labels\")\n}\n\n\n#' @export\nassign_labels.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      sprintf(\n        \"Adding labels currently not possible for variables of class `%s`.\",\n        class(x)[1]\n      )\n    )\n  }\n  x\n}\n\n#' @rdname assign_labels\n#' @export\nassign_labels.numeric <- function(x, variable = NULL, values = NULL, ...) {\n  # add variable label\n  if (!is.null(variable)) {\n    if (is.character(variable) && length(variable) == 1L) {\n      attr(x, \"label\") <- variable\n    } else {\n      insight::format_error(\n        \"Variable labels (argument `variable`) must be provided as a single character string, e.g. `variable = \\\"mylabel\\\"`.\" # nolint\n      )\n    }\n  }\n\n  # if user just wants to add a variable label, skip next steps\n  if (!is.null(values)) {\n    # extract unique values\n    unique_values <- as.vector(sort(stats::na.omit(unique(x))))\n    value_labels <- NULL\n\n    # do we have a names vector for \"values\"?\n    # else check if number of labels and values match\n    if (is.null(names(values))) {\n      if (length(values) == length(unique_values)) {\n        value_labels <- stats::setNames(unique_values, values)\n      } else {\n        insight::format_error(\n          \"Cannot add labels. Number of unique values and number of value labels are not equal.\",\n          sprintf(\n            \"There are %i unique values and %i provided labels.\",\n            length(unique_values),\n            length(values)\n          )\n        )\n      }\n    } else {\n      # check whether we have matches of labels and values\n      matching_labels <- names(values) %in% unique_values\n      if (!all(matching_labels)) {\n        insight::format_error(\n          \"Following labels were associated with values that don't exist:\",\n          text_concatenate(\n            paste0(\n              values[!matching_labels],\n              \" (\",\n              names(values)[!matching_labels],\n              \")\"\n            ),\n            enclose = \"`\"\n          )\n        )\n      }\n      values <- values[names(values) %in% unique_values]\n\n      if (length(values)) {\n        # we need to switch names and values\n        value_labels <- stats::setNames(\n          coerce_to_numeric(names(values)),\n          values\n        )\n      }\n    }\n\n    attr(x, \"labels\") <- value_labels\n  }\n\n  x\n}\n\n#' @export\nassign_labels.factor <- assign_labels.numeric\n\n#' @export\nassign_labels.character <- assign_labels.numeric\n\n#' @rdname assign_labels\n#' @export\nassign_labels.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  values = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  x[select] <- lapply(\n    x[select],\n    assign_labels,\n    values = values,\n    verbose = verbose,\n    ...\n  )\n  x\n}\n"
  },
  {
    "path": "R/categorize.R",
    "content": "#' @title Recode (or \"cut\" / \"bin\") data into groups of values.\n#' @name categorize\n#'\n#' @description\n#' This functions divides the range of variables into intervals and recodes\n#' the values inside these intervals according to their related interval.\n#' It is basically a wrapper around base R's `cut()`, providing a simplified\n#' and more accessible way to define the interval breaks (cut-off values).\n#'\n#' @param x A (grouped) data frame, numeric vector or factor.\n#' @param split Character vector, indicating at which breaks to split variables,\n#'   or numeric values with values indicating breaks. If character, may be one\n#'   of `\"median\"`, `\"mean\"`, `\"quantile\"`, `\"equal_length\"`, or `\"equal_range\"`.\n#'   `\"median\"` or `\"mean\"` will return dichotomous variables, split at their\n#'   mean or median, respectively. `\"quantile\"` and `\"equal_length\"` will split\n#'   the variable into `n_groups` groups, where each group refers to an interval\n#'   of a specific range of values. Thus, the length of each interval will be\n#'   based on the number of groups. `\"equal_range\"` also splits the variable\n#'   into multiple groups, however, the length of the interval is given, and\n#'   the number of resulting groups (and hence, the number of breaks) will be\n#'   determined by how many intervals can be generated, based on the full range\n#'   of the variable.\n#' @param n_groups If `split` is `\"quantile\"` or `\"equal_length\"`, this defines\n#'   the number of requested groups (i.e. resulting number of levels or values)\n#'   for the recoded variable(s). `\"quantile\"` will define intervals based\n#'   on the distribution of the variable, while `\"equal_length\"` tries to\n#'   divide the range of the variable into pieces of equal length.\n#' @param range If `split = \"equal_range\"`, this defines the range of values\n#'   that are recoded into a new value.\n#' @param lowest Minimum value of the recoded variable(s). If `NULL` (the default),\n#'   for numeric variables, the minimum of the original input is preserved. For\n#'   factors, the default minimum is `1`. For `split = \"equal_range\"`, the\n#'   default minimum is always `1`, unless specified otherwise in `lowest`.\n#' @param breaks Character, indicating whether breaks for categorizing data are\n#'   `\"inclusive\"` (values indicate the _upper_ bound of the _previous_ group or\n#'   interval) or `\"exclusive\"` (values indicate the _lower_ bound of the _next_\n#'   group or interval to begin). Use `labels = \"range\"` to make this behaviour\n#'   easier to see.\n#' @param labels Character vector of value labels. If not `NULL`, `categorize()`\n#'   will returns factors instead of numeric variables, with `labels` used\n#'   for labelling the factor levels. Can also be `\"mean\"`, `\"median\"`,\n#'   `\"range\"` or `\"observed\"` for a factor with labels as the mean/median,\n#'   the requested range (even if not all values of that range are present in\n#'   the data) or observed range (range of the actual recoded values) of each\n#'   group. See 'Examples'.\n#' @param append Logical or string. If `TRUE`, recoded or converted variables\n#'   get new column names and are appended (column bind) to `x`, thus returning\n#'   both the original and the recoded variables. The new columns get a suffix,\n#'   based on the calling function: `\"_r\"` for recode functions, `\"_n\"` for\n#'   `to_numeric()`, `\"_f\"` for `to_factor()`, or `\"_s\"` for\n#'   `slide()`. If `append=FALSE`, original variables in `x` will be\n#'   overwritten by their recoded versions. If a character value, recoded\n#'   variables are appended with new column names (using the defined suffix) to\n#'   the original data frame.\n#' @param ... not used.\n#' @inheritParams extract_column_names\n#'\n#' @inherit data_rename seealso\n#'\n#' @details\n#'\n#' # Splits and breaks (cut-off values)\n#'\n#' Breaks are by default _exclusive_, this means that these values indicate\n#' the lower bound of the next group or interval to begin. Take a simple\n#' example, a numeric variable with values from 1 to 9. The median would be 5,\n#' thus the first interval ranges from 1-4 and is recoded into 1, while 5-9\n#' would turn into 2 (compare `cbind(1:9, categorize(1:9))`). The same variable,\n#' using `split = \"quantile\"` and `n_groups = 3` would define breaks at 3.67\n#' and 6.33 (see `quantile(1:9, probs = c(1/3, 2/3))`), which means that values\n#' from 1 to 3 belong to the first interval and are recoded into 1 (because\n#' the next interval starts at 3.67), 4 to 6 into 2 and 7 to 9 into 3.\n#'\n#' The opposite behaviour can be achieved using `breaks = \"inclusive\"`, in which\n#' case\n#'\n#' # Recoding into groups with equal size or range\n#'\n#' `split = \"equal_length\"` and `split = \"equal_range\"` try to divide the\n#' range of `x` into intervals of similar (or same) length. The difference is\n#' that `split = \"equal_length\"` will divide the range of `x` into `n_groups`\n#' pieces and thereby defining the intervals used as breaks (hence, it is\n#' equivalent to `cut(x, breaks = n_groups)`), while  `split = \"equal_range\"`\n#' will cut `x` into intervals that all have the length of `range`, where the\n#' first interval by defaults starts at `1`. The lowest (or starting) value\n#' of that interval can be defined using the `lowest` argument.\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @return `x`, recoded into groups. By default `x` is numeric, unless `labels`\n#'   is specified. In this case, a factor is returned, where the factor levels\n#'   (i.e. recoded groups are labelled accordingly.\n#'\n#' @examples\n#' set.seed(123)\n#' x <- sample(1:10, size = 50, replace = TRUE)\n#'\n#' table(x)\n#'\n#' # by default, at median\n#' table(categorize(x))\n#'\n#' # into 3 groups, based on distribution (quantiles)\n#' table(categorize(x, split = \"quantile\", n_groups = 3))\n#'\n#' # into 3 groups, user-defined break\n#' table(categorize(x, split = c(3, 5)))\n#'\n#' set.seed(123)\n#' x <- sample(1:100, size = 500, replace = TRUE)\n#'\n#' # into 5 groups, try to recode into intervals of similar length,\n#' # i.e. the range within groups is the same for all groups\n#' table(categorize(x, split = \"equal_length\", n_groups = 5))\n#'\n#' # into 5 groups, try to return same range within groups\n#' # i.e. 1-20, 21-40, 41-60, etc. Since the range of \"x\" is\n#' # 1-100, and we have a range of 20, this results into 5\n#' # groups, and thus is for this particular case identical\n#' # to the previous result.\n#' table(categorize(x, split = \"equal_range\", range = 20))\n#'\n#' # return factor with value labels instead of numeric value\n#' set.seed(123)\n#' x <- sample(1:10, size = 30, replace = TRUE)\n#' categorize(x, \"equal_length\", n_groups = 3)\n#' categorize(x, \"equal_length\", n_groups = 3, labels = c(\"low\", \"mid\", \"high\"))\n#'\n#' # cut numeric into groups with the mean or median as a label name\n#' x <- sample(1:10, size = 30, replace = TRUE)\n#' categorize(x, \"equal_length\", n_groups = 3, labels = \"mean\")\n#' categorize(x, \"equal_length\", n_groups = 3, labels = \"median\")\n#'\n#' # cut numeric into groups with the requested range as a label name\n#' # each category has the same range, and labels indicate this range\n#' categorize(mtcars$mpg, \"equal_length\", n_groups = 5, labels = \"range\")\n#' # in this example, each category has the same range, but labels only refer\n#' # to the ranges of the actual values (present in the data) inside each group\n#' categorize(mtcars$mpg, \"equal_length\", n_groups = 5, labels = \"observed\")\n#' @export\ncategorize <- function(x, ...) {\n  UseMethod(\"categorize\")\n}\n\n#' @export\ncategorize.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      paste0(\n        \"Variables of class `\",\n        class(x)[1],\n        \"` can't be recoded and remain unchanged.\"\n      )\n    )\n  }\n  x\n}\n\n\n#' @rdname categorize\n#' @export\ncategorize.numeric <- function(\n  x,\n  split = \"median\",\n  n_groups = NULL,\n  range = NULL,\n  lowest = 1,\n  breaks = \"exclusive\",\n  labels = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # sanity check\n  split <- .sanitize_split_arg(split, n_groups, range)\n\n  # handle aliases\n  if (identical(split, \"equal_length\")) {\n    split <- \"length\"\n  }\n  if (identical(split, \"equal_range\")) {\n    split <- \"range\"\n  }\n\n  # check for valid values\n  breaks <- match.arg(breaks, c(\"exclusive\", \"inclusive\"))\n\n  # save\n  original_x <- x\n\n  # no missings\n  x <- stats::na.omit(x)\n\n  # stop if all NA\n  if (!length(x)) {\n    if (isTRUE(verbose)) {\n      insight::format_alert(\n        \"Variable contains only missing values. No recoding carried out.\"\n      )\n    }\n    return(original_x)\n  }\n\n  if (is.numeric(split)) {\n    category_splits <- split\n  } else {\n    category_splits <- switch(\n      split,\n      median = stats::median(x),\n      mean = mean(x),\n      length = n_groups,\n      quantile = stats::quantile(x, probs = seq_len(n_groups) / n_groups),\n      range = .equal_range(x, range, n_groups, lowest),\n      NULL\n    )\n  }\n\n  # complete ranges, including minimum and maximum\n  if (!identical(split, \"length\")) {\n    category_splits <- unique(c(min(x), category_splits, max(x)))\n  }\n\n  # recode into groups\n  out <- droplevels(cut(\n    x,\n    breaks = category_splits,\n    include.lowest = TRUE,\n    right = identical(breaks, \"inclusive\")\n  ))\n  cut_result <- out\n  levels(out) <- 1:nlevels(out)\n\n  # fix lowest value, add back into original vector\n  out <- as.numeric(out)\n  if (!is.null(lowest)) {\n    out <- out - (min(out) - lowest)\n  }\n  original_x[!is.na(original_x)] <- out\n\n  # turn into factor?\n  .original_x_to_factor(original_x, x, cut_result, labels, out, verbose, ...)\n}\n\n\n#' @export\ncategorize.factor <- function(x, ...) {\n  original_x <- x\n  levels(x) <- 1:nlevels(x)\n  out <- as.factor(categorize(as.numeric(x), ...))\n  .set_back_labels(out, original_x, include_values = FALSE)\n}\n\n\n#' @rdname categorize\n#' @export\ncategorize.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  split = \"median\",\n  n_groups = NULL,\n  range = NULL,\n  lowest = 1,\n  breaks = \"exclusive\",\n  labels = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_r\"\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  x[select] <- lapply(\n    x[select],\n    categorize,\n    split = split,\n    n_groups = n_groups,\n    range = range,\n    lowest = lowest,\n    breaks = breaks,\n    labels = labels,\n    verbose = verbose,\n    ...\n  )\n  x\n}\n\n\n#' @export\ncategorize.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  split = \"median\",\n  n_groups = NULL,\n  range = NULL,\n  lowest = 1,\n  breaks = \"exclusive\",\n  labels = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  grps <- attr(x, \"groups\", exact = TRUE)[[\".rows\"]]\n\n  attr_data <- attributes(x)\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    remove_group_var = TRUE,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_r\"\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  x <- as.data.frame(x)\n  for (rows in grps) {\n    x[rows, ] <- categorize(\n      x[rows, , drop = FALSE],\n      split = split,\n      n_groups = n_groups,\n      range = range,\n      lowest = lowest,\n      breaks = breaks,\n      labels = labels,\n      select = select,\n      exclude = exclude,\n      append = FALSE, # need to set to FALSE here, else variable will be doubled\n      ignore_case = ignore_case,\n      verbose = verbose,\n      ...\n    )\n  }\n  # set back class, so data frame still works with dplyr\n  x <- .replace_attrs(x, attr_data)\n  x\n}\n\n\n# tools --------------------\n\n.equal_range <- function(x, range, n_groups, lowest = NULL) {\n  if (is.null(lowest)) {\n    lowest <- 1\n  }\n  if (is.null(range)) {\n    size <- ceiling((max(x) - min(x)) / n_groups)\n    range <- as.numeric(size)\n  }\n  seq(lowest, max(x), by = range)\n}\n\n\n.sanitize_split_arg <- function(split, n_groups, range) {\n  # check arguments\n  if (is.character(split)) {\n    split <- match.arg(\n      split,\n      choices = c(\n        \"median\",\n        \"mean\",\n        \"quantile\",\n        \"equal_length\",\n        \"equal_range\",\n        \"equal\",\n        \"equal_distance\",\n        \"range\",\n        \"distance\"\n      )\n    )\n  }\n\n  if (\n    is.character(split) &&\n      split %in% c(\"quantile\", \"equal_length\") &&\n      is.null(n_groups)\n  ) {\n    insight::format_error(\n      \"Recoding based on quantiles or equal-sized groups requires the `n_groups` argument to be specified.\"\n    )\n  }\n\n  if (\n    is.character(split) &&\n      split == \"equal_range\" &&\n      is.null(n_groups) &&\n      is.null(range)\n  ) {\n    insight::format_error(\n      \"Recoding into groups with equal range requires either the `range` or `n_groups` argument to be specified.\"\n    )\n  }\n\n  split\n}\n\n\n.original_x_to_factor <- function(\n  original_x,\n  x,\n  cut_result,\n  labels,\n  out,\n  verbose,\n  ...\n) {\n  if (!is.null(labels)) {\n    if (length(labels) == length(unique(out))) {\n      original_x <- as.factor(original_x)\n      levels(original_x) <- labels\n    } else if (\n      length(labels) == 1 &&\n        labels %in% c(\"mean\", \"median\", \"range\", \"observed\")\n    ) {\n      original_x <- as.factor(original_x)\n      no_na_x <- original_x[!is.na(original_x)]\n      out <- switch(\n        labels,\n        mean = stats::aggregate(x, list(no_na_x), FUN = mean, na.rm = TRUE)$x,\n        median = stats::aggregate(\n          x,\n          list(no_na_x),\n          FUN = stats::median,\n          na.rm = TRUE\n        )$x,\n        # labels basically like what \"cut()\" returns\n        range = levels(cut_result),\n        # range based on the values that are actually present in the data\n        {\n          temp <- stats::aggregate(\n            x,\n            list(no_na_x),\n            FUN = range,\n            na.rm = TRUE\n          )$x\n          apply(temp, 1, function(i) {\n            paste0(\"(\", paste(as.vector(i), collapse = \"-\"), \")\")\n          })\n        }\n      )\n      levels(original_x) <- insight::format_value(out, ...)\n    } else if (isTRUE(verbose)) {\n      insight::format_warning(\n        \"Argument `labels` and levels of the recoded variable are not of the same length.\",\n        \"Variable will not be converted to factor.\"\n      )\n    }\n  }\n  original_x\n}\n"
  },
  {
    "path": "R/center.R",
    "content": "#' Centering (Grand-Mean Centering)\n#'\n#' Performs a grand-mean centering of data.\n#'\n#' @param x A (grouped) data frame, a (numeric or character) vector or a factor.\n#' @param force Logical, if `TRUE`, forces centering of factors as\n#'   well. Factors are converted to numerical values, with the lowest level\n#'   being the value `1` (unless the factor has numeric levels, which are\n#'   converted to the corresponding numeric value).\n#' @param robust Logical, if `TRUE`, centering is done by subtracting the\n#'   median from the variables. If `FALSE`, variables are centered by\n#'   subtracting the mean.\n#' @param append Logical or string. If `TRUE`, centered variables get new\n#'   column names (with the suffix `\"_c\"`) and are appended (column bind) to `x`,\n#'   thus returning both the original and the centered variables. If `FALSE`,\n#'   original variables in `x` will be overwritten by their centered versions.\n#'   If a character value, centered variables are appended with new column\n#'   names (using the defined suffix) to the original data frame.\n#' @param verbose Toggle warnings and messages.\n#' @param weights Can be `NULL` (for no weighting), or:\n#'   - For data frames: a numeric vector of weights, or a character of the\n#'   name of a column in the `data.frame` that contains the weights.\n#'   - For numeric vectors: a numeric vector of weights.\n#' @param center Numeric value, which can be used as alternative to\n#'   `reference` to define a reference centrality. If `center` is of length 1,\n#'   it will be recycled to match the length of selected variables for centering.\n#'   Else, `center` must be of same length as the number of selected variables.\n#'   Values in `center` will be matched to selected variables in the provided\n#'   order, unless a named vector is given. In this case, names are matched\n#'   against the names of the selected variables.\n#' @param ... Currently not used.\n#' @inheritParams extract_column_names\n#' @inheritParams standardize\n#'\n#' @section Selection of variables - the `select` argument:\n#' For most functions that have a `select` argument (including this function),\n#' the complete input data frame is returned, even when `select` only selects\n#' a range of variables. That is, the function is only applied to those variables\n#' that have a match in `select`, while all other variables remain unchanged.\n#' In other words: for this function, `select` will not omit any non-included\n#' variables, so that the returned data frame will include all variables\n#' from the input data frame.\n#'\n#' @note\n#' **Difference between centering and standardizing**: Standardized variables\n#' are computed by subtracting the mean of the variable and then dividing it by\n#' the standard deviation, while centering variables involves only the\n#' subtraction.\n#'\n#' @seealso If centering within-clusters (instead of grand-mean centering)\n#'   is required, see [demean()]. For standardizing, see [standardize()], and\n#'   [makepredictcall.dw_transformer()] for use in model formulas.\n#'\n#' @return The centered variables.\n#'\n#' @examples\n#' data(iris)\n#'\n#' # entire data frame or a vector\n#' head(iris$Sepal.Width)\n#' head(center(iris$Sepal.Width))\n#' head(center(iris))\n#' head(center(iris, force = TRUE))\n#'\n#' # only the selected columns from a data frame\n#' center(anscombe, select = c(\"x1\", \"x3\"))\n#' center(anscombe, exclude = c(\"x1\", \"x3\"))\n#'\n#' # centering with reference center and scale\n#' d <- data.frame(\n#'   a = c(-2, -1, 0, 1, 2),\n#'   b = c(3, 4, 5, 6, 7)\n#' )\n#'\n#' # default centering at mean\n#' center(d)\n#'\n#' # centering, using 0 as mean\n#' center(d, center = 0)\n#'\n#' # centering, using -5 as mean\n#' center(d, center = -5)\n#' @export\ncenter <- function(x, ...) {\n  UseMethod(\"center\")\n}\n\n#' @rdname center\n#' @export\ncentre <- center\n\n\n#' @export\ncenter.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      sprintf(\n        \"Centering currently not possible for variables of class `%s`.\",\n        class(x)[1]\n      ),\n      \"You may open an issue at https://github.com/easystats/datawizard/issues.\"\n    )\n  }\n  x\n}\n\n\n#' @rdname center\n#' @export\ncenter.numeric <- function(\n  x,\n  robust = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # set default. Furthermore, data.frame methods cannot return a vector\n  # of NULLs for each variable - instead they return NA. Thus, we have to\n  # treat NA like NULL\n  if (is.null(center) || is.na(center)) {\n    center <- TRUE\n  }\n\n  my_args <- .process_std_center(\n    x,\n    weights,\n    robust,\n    verbose,\n    reference,\n    center,\n    scale = NULL\n  )\n  dot_args <- list(...)\n\n  if (is.null(my_args)) {\n    # all NA?\n    return(x)\n  } else if (is.null(my_args$check)) {\n    vals <- rep(0, length(my_args$vals)) # If only unique value\n  } else {\n    vals <- as.vector(my_args$vals - my_args$center)\n  }\n\n  centered_x <- rep(NA, length(my_args$valid_x))\n  centered_x[my_args$valid_x] <- vals\n  attr(centered_x, \"center\") <- my_args$center\n  attr(centered_x, \"scale\") <- 1\n  attr(centered_x, \"robust\") <- robust\n  # labels\n  z <- .set_back_labels(centered_x, x, include_values = FALSE)\n  # don't add attribute when we call data frame methods\n  if (!isFALSE(dot_args$add_transform_class)) {\n    class(z) <- c(\"dw_transformer\", class(z))\n  }\n  z\n}\n\n\n#' @export\ncenter.factor <- function(\n  x,\n  robust = FALSE,\n  weights = NULL,\n  force = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  if (!force) {\n    return(x)\n  }\n  center(\n    .factor_to_numeric(x),\n    weights = weights,\n    robust = robust,\n    verbose = verbose,\n    ...\n  )\n}\n\n#' @export\ncenter.logical <- center.factor\n\n#' @export\ncenter.character <- center.factor\n\n#' @export\ncenter.Date <- center.factor\n\n#' @export\ncenter.AsIs <- center.numeric\n\n#' @rdname center\n#' @inheritParams standardize.data.frame\n#' @export\ncenter.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  robust = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  force = FALSE,\n  remove_na = c(\"none\", \"selected\", \"all\"),\n  append = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # process arguments\n  my_args <- .process_std_args(\n    x,\n    select,\n    exclude,\n    weights,\n    append,\n    append_suffix = \"_c\",\n    keep_factors = force,\n    remove_na,\n    reference,\n    .center = center,\n    .scale = NULL\n  )\n\n  # set new values\n  x <- my_args$x\n\n  for (var in my_args$select) {\n    x[[var]] <- center(\n      x[[var]],\n      robust = robust,\n      weights = my_args$weights,\n      verbose = FALSE,\n      reference = reference[[var]],\n      center = my_args$center[var],\n      force = force,\n      add_transform_class = FALSE\n    )\n  }\n\n  attr(x, \"center\") <- vapply(\n    x[my_args$select],\n    function(z) attributes(z)$center,\n    numeric(1)\n  )\n  attr(x, \"scale\") <- vapply(\n    x[my_args$select],\n    function(z) attributes(z)$scale,\n    numeric(1)\n  )\n  attr(x, \"robust\") <- robust\n  x\n}\n\n\n#' @export\ncenter.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  robust = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  force = FALSE,\n  remove_na = c(\"none\", \"selected\", \"all\"),\n  append = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  my_args <- .process_grouped_df(\n    x,\n    select,\n    exclude,\n    append,\n    append_suffix = \"_c\",\n    reference,\n    weights,\n    keep_factors = force\n  )\n\n  for (rows in my_args$grps) {\n    my_args$x[rows, ] <- center(\n      my_args$x[rows, , drop = FALSE],\n      select = my_args$select,\n      exclude = NULL,\n      robust = robust,\n      weights = my_args$weights,\n      remove_na = remove_na,\n      verbose = verbose,\n      force = force,\n      append = FALSE,\n      center = center,\n      add_transform_class = FALSE,\n      ...\n    )\n  }\n  # set back class, so data frame still works with dplyr\n  attributes(my_args$x) <- my_args$info\n  my_args$x\n}\n\n\n# methods -------------------------\n\n#' @export\nprint.dw_transformer <- function(x, ...) {\n  print(as.vector(x), ...)\n  vector_info <- NULL\n  if (!is.null(attributes(x)$scale)) {\n    # attributes for center() / standardize()\n    vector_info <- sprintf(\n      \"(center: %.2g, scale = %.2g)\\n\",\n      attributes(x)$center,\n      attributes(x)$scale\n    )\n  } else if (!is.null(attributes(x)$range_difference)) {\n    # attributes for normalize() / rescale()\n    vector_info <- sprintf(\n      \"(original range = %.2g to %.2g)\\n\",\n      attributes(x)$min_value,\n      attributes(x)$min_value + attributes(x)$range_difference\n    )\n  }\n  if (!is.null(vector_info)) {\n    insight::print_color(vector_info, color = \"grey\")\n  }\n  invisible(x)\n}\n"
  },
  {
    "path": "R/contrs.R",
    "content": "#' Deviation Contrast Matrix\n#'\n#' Build a deviation contrast matrix, a type of _effects contrast_ matrix.\n#'\n#' @inheritParams stats::contr.sum\n#'\n#' @details\n#' In effects coding, unlike treatment/dummy coding\n#' ([stats::contr.treatment()]), each contrast sums to 0. In regressions models,\n#' this results in an intercept that represents the (unweighted) average of the\n#' group means. In ANOVA settings, this also guarantees that lower order effects\n#' represent _main_ effects (and not _simple_ or _conditional_ effects, as is\n#' the case when using R's default [stats::contr.treatment()]).\n#' \\cr\\cr\n#' Deviation coding (`contr.deviation`) is a type of effects coding. With\n#' deviation coding, the coefficients for factor variables are interpreted as\n#' the difference of each factor level from the base level (this is the same\n#' interpretation as with treatment/dummy coding). For example, for a factor\n#' `group` with levels \"A\", \"B\", and \"C\", with `contr.devation`, the intercept\n#' represents the overall mean (average of the group means for the 3 groups),\n#' and the coefficients `groupB` and `groupC` represent the differences between\n#' the A group mean and the B and C group means, respectively.\n#' \\cr\\cr\n#' Sum coding ([stats::contr.sum()]) is another type of effects coding. With sum\n#' coding, the coefficients for factor variables are interpreted as the\n#' difference of each factor level from **the grand (across-groups) mean**. For\n#' example, for a factor `group` with levels \"A\", \"B\", and \"C\", with\n#' `contr.sum`, the intercept represents the overall mean (average of the group\n#' means for the 3 groups), and the coefficients `group1` and `group2` represent\n#' the differences the\n#' **A** and **B** group means from the overall mean, respectively.\n#'\n#' @seealso [stats::contr.sum()]\n#'\n#' @examplesIf !identical(Sys.getenv(\"IN_PKGDOWN\"), \"true\")\n#' \\donttest{\n#' data(\"mtcars\")\n#'\n#' mtcars <- data_modify(mtcars, cyl = factor(cyl))\n#'\n#' c.treatment <- cbind(Intercept = 1, contrasts(mtcars$cyl))\n#' solve(c.treatment)\n#' #>            4 6 8\n#' #> Intercept  1 0 0  # mean of the 1st level\n#' #> 6         -1 1 0  # 2nd level - 1st level\n#' #> 8         -1 0 1  # 3rd level - 1st level\n#'\n#' contrasts(mtcars$cyl) <- contr.sum\n#' c.sum <- cbind(Intercept = 1, contrasts(mtcars$cyl))\n#' solve(c.sum)\n#' #>                4      6      8\n#' #> Intercept  0.333  0.333  0.333   # overall mean\n#' #>            0.667 -0.333 -0.333   # deviation of 1st from overall mean\n#' #>           -0.333  0.667 -0.333   # deviation of 2nd from overall mean\n#'\n#'\n#' contrasts(mtcars$cyl) <- contr.deviation\n#' c.deviation <- cbind(Intercept = 1, contrasts(mtcars$cyl))\n#' solve(c.deviation)\n#' #>                4     6     8\n#' #> Intercept  0.333 0.333 0.333   # overall mean\n#' #> 6         -1.000 1.000 0.000   # 2nd level - 1st level\n#' #> 8         -1.000 0.000 1.000   # 3rd level - 1st level\n#'\n#' ## With Interactions -----------------------------------------\n#' mtcars <- data_modify(mtcars, am = C(am, contr = contr.deviation))\n#' mtcars <- data_arrange(mtcars, select = c(\"cyl\", \"am\"))\n#'\n#' mm <- unique(model.matrix(~ cyl * am, data = mtcars))\n#' rownames(mm) <- c(\n#'   \"cyl4.am0\", \"cyl4.am1\", \"cyl6.am0\",\n#'   \"cyl6.am1\", \"cyl8.am0\", \"cyl8.am1\"\n#' )\n#'\n#' solve(mm)\n#' #>             cyl4.am0 cyl4.am1 cyl6.am0 cyl6.am1 cyl8.am0 cyl8.am1\n#' #> (Intercept)    0.167    0.167    0.167    0.167    0.167    0.167  # overall mean\n#' #> cyl6          -0.500   -0.500    0.500    0.500    0.000    0.000  # cyl MAIN eff: 2nd - 1st\n#' #> cyl8          -0.500   -0.500    0.000    0.000    0.500    0.500  # cyl MAIN eff: 2nd - 1st\n#' #> am1           -0.333    0.333   -0.333    0.333   -0.333    0.333  # am MAIN eff\n#' #> cyl6:am1       1.000   -1.000   -1.000    1.000    0.000    0.000\n#' #> cyl8:am1       1.000   -1.000    0.000    0.000   -1.000    1.000\n#' }\n#'\n#' @export\ncontr.deviation <- function(n, base = 1, contrasts = TRUE, sparse = FALSE) {\n  cont <- stats::contr.treatment(\n    n,\n    base = base,\n    contrasts = contrasts,\n    sparse = sparse\n  )\n  if (contrasts) {\n    n <- nrow(cont)\n    cont <- cont - 1 / n\n  }\n  cont\n}\n"
  },
  {
    "path": "R/convert_na_to.R",
    "content": "#' @title Replace missing values in a variable or a data frame.\n#' @name convert_na_to\n#'\n#' @description\n#' Replace missing values in a variable or a data frame.\n#'\n#' @param x A numeric, factor, or character vector, or a data frame.\n#' @param replacement Numeric or character value that will be used to\n#' replace `NA`.\n#' @param verbose Toggle warnings.\n#' @param ... Not used.\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @return\n#' `x`, where `NA` values are replaced by `replacement`.\n#'\n#' @examples\n#' # Convert NA to 0 in a numeric vector\n#' convert_na_to(\n#'   c(9, 3, NA, 2, 3, 1, NA, 8),\n#'   replacement = 0\n#' )\n#'\n#' # Convert NA to \"missing\" in a character vector\n#' convert_na_to(\n#'   c(\"a\", NA, \"d\", \"z\", NA, \"t\"),\n#'   replacement = \"missing\"\n#' )\n#'\n#' ### For data frames\n#'\n#' test_df <- data.frame(\n#'   x = c(1, 2, NA),\n#'   x2 = c(4, 5, NA),\n#'   y = c(\"a\", \"b\", NA)\n#' )\n#'\n#' # Convert all NA to 0 in numeric variables, and all NA to \"missing\" in\n#' # character variables\n#' convert_na_to(\n#'   test_df,\n#'   replace_num = 0,\n#'   replace_char = \"missing\"\n#' )\n#'\n#' # Convert a specific variable in the data frame\n#' convert_na_to(\n#'   test_df,\n#'   replace_num = 0,\n#'   replace_char = \"missing\",\n#'   select = \"x\"\n#' )\n#'\n#' # Convert all variables starting with \"x\"\n#' convert_na_to(\n#'   test_df,\n#'   replace_num = 0,\n#'   replace_char = \"missing\",\n#'   select = starts_with(\"x\")\n#' )\n#'\n#' # Convert NA to 1 in variable 'x2' and to 0 in all other numeric\n#' # variables\n#' convert_na_to(\n#'   test_df,\n#'   replace_num = 0,\n#'   select = list(x2 = 1)\n#' )\n#'\n#' @export\n\nconvert_na_to <- function(x, ...) {\n  UseMethod(\"convert_na_to\")\n}\n\n\n#' @export\nconvert_na_to.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      sprintf(\n        \"Converting missing values (`NA`) into regular values currently not possible for variables of class `%s`.\",\n        class(x)[1]\n      )\n    )\n  }\n  x\n}\n\n\n#' @rdname convert_na_to\n#' @export\nconvert_na_to.numeric <- function(x, replacement = NULL, verbose = TRUE, ...) {\n  if (insight::is_empty_object(replacement) || !is.numeric(replacement)) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\"`replacement` needs to be a numeric vector.\")\n    }\n  } else if (length(replacement) > 1) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\"`replacement` needs to be of length one.\")\n    }\n  } else {\n    x[is.na(x)] <- replacement\n  }\n  x\n}\n\n\n#' @export\nconvert_na_to.factor <- function(x, replacement = NULL, verbose = TRUE, ...) {\n  if (insight::is_empty_object(replacement) || length(replacement) > 1) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\"`replacement` needs to be of length one.\")\n    }\n  } else {\n    x <- addNA(x)\n    levels(x) <- c(levels(x), replacement)\n    x[is.na(x)] <- replacement\n  }\n  x\n}\n\n\n#' @rdname convert_na_to\n#' @export\nconvert_na_to.character <- function(\n  x,\n  replacement = NULL,\n  verbose = TRUE,\n  ...\n) {\n  if (\n    insight::is_empty_object(replacement) ||\n      !is.character(replacement) && !is.numeric(replacement)\n  ) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\n        \"`replacement` needs to be a character or numeric vector.\"\n      )\n    }\n  } else if (length(replacement) > 1) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\"`replacement` needs to be of length one.\")\n    }\n  } else {\n    x[is.na(x)] <- as.character(replacement)\n  }\n  x\n}\n\n\n#' @param replace_num Value to replace `NA` when variable is of type numeric.\n#' @param replace_char Value to replace `NA` when variable is of type character.\n#' @param replace_fac Value to replace `NA` when variable is of type factor.\n#' @inheritParams extract_column_names\n#'\n#' @rdname convert_na_to\n#' @export\nconvert_na_to.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  replacement = NULL,\n  replace_num = replacement,\n  replace_char = replacement,\n  replace_fac = replacement,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  my_data <- x\n  select_nse <- .select_nse(\n    select,\n    data = my_data,\n    exclude = exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # default\n  lookup <- lapply(x, function(y) {\n    if (is.numeric(y)) {\n      replace_num\n    } else if (is.character(y)) {\n      replace_char\n    } else if (is.factor(y)) {\n      replace_fac\n    }\n  })\n\n  # override for specific vars\n  try_eval <- try(eval(select), silent = TRUE)\n  select_is_list <- !inherits(try_eval, \"try-error\") && is.list(select)\n\n  if (select_is_list) {\n    for (i in select_nse) {\n      lookup[[i]] <- select[[i]]\n    }\n  } else {\n    lookup <- lookup[names(lookup) %in% select_nse]\n  }\n\n  lookup <- Filter(Negate(is.null), lookup)\n\n  for (i in names(lookup)) {\n    x[[i]] <- convert_na_to(\n      x[[i]],\n      replacement = lookup[[i]],\n      verbose = verbose\n    )\n  }\n\n  x\n}\n"
  },
  {
    "path": "R/convert_to_na.R",
    "content": "#' @title Convert non-missing values in a variable into missing values.\n#' @name convert_to_na\n#'\n#' @description\n#' Convert non-missing values in a variable into missing values.\n#'\n#' @param x A vector, factor or a data frame.\n#' @param na Numeric, character vector or logical (or a list of numeric, character\n#'   vectors or logicals) with values that should be converted to `NA`. Numeric\n#'   values applied to numeric vectors, character values are used for factors,\n#'   character vectors or date variables, and logical values for logical vectors.\n#' @param drop_levels Logical, for factors, when specific levels are replaced\n#'   by `NA`, should unused levels be dropped?\n#' @param ... Not used.\n#' @inheritParams extract_column_names\n#'\n#' @return\n#' `x`, where all values in `na` are converted to `NA`.\n#'\n#' @examples\n#' x <- sample(1:6, size = 30, replace = TRUE)\n#' x\n#' # values 4 and 5 to NA\n#' convert_to_na(x, na = 4:5)\n#'\n#' # data frames\n#' set.seed(123)\n#' x <- data.frame(\n#'   a = sample(1:6, size = 20, replace = TRUE),\n#'   b = sample(letters[1:6], size = 20, replace = TRUE),\n#'   c = sample(c(30:33, 99), size = 20, replace = TRUE)\n#' )\n#' # for all numerics, convert 5 to NA. Character/factor will be ignored.\n#' convert_to_na(x, na = 5)\n#'\n#' # for numerics, 5 to NA, for character/factor, \"f\" to NA\n#' convert_to_na(x, na = list(6, \"f\"))\n#'\n#' # select specific variables\n#' convert_to_na(x, select = c(\"a\", \"b\"), na = list(6, \"f\"))\n#' @export\nconvert_to_na <- function(x, ...) {\n  UseMethod(\"convert_to_na\")\n}\n\n\n#' @export\nconvert_to_na.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      sprintf(\n        \"Converting values into missing values (`NA`) currently not possible for variables of class `%s`.\",\n        class(x)[1]\n      )\n    )\n  }\n  x\n}\n\n\n#' @rdname convert_to_na\n#' @export\nconvert_to_na.numeric <- function(x, na = NULL, verbose = TRUE, ...) {\n  # if we have a list, use first valid element\n  if (is.list(na)) {\n    na <- unlist(\n      na[vapply(na, is.numeric, FUN.VALUE = TRUE)],\n      use.names = FALSE\n    )\n  }\n\n  if (insight::is_empty_object(na) || !is.numeric(na)) {\n    if (isTRUE(verbose)) {\n      insight::format_alert(\n        \"Could not convert values into `NA` for a numeric variable.\",\n        \"To do this, `na` needs to be a numeric vector, or a list that contains numeric vector elements.\"\n      )\n    }\n  } else {\n    matches <- which(x %in% na)\n    x[matches] <- NA\n    # drop unused labels\n    value_labels <- attr(x, \"labels\", exact = TRUE)\n    attr(x, \"labels\") <- value_labels[!value_labels %in% na]\n  }\n  x\n}\n\n\n#' @rdname convert_to_na\n#' @export\nconvert_to_na.factor <- function(\n  x,\n  na = NULL,\n  drop_levels = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # if we have a list, use first valid element\n  if (is.list(na)) {\n    na <- unlist(\n      na[vapply(na, is.character, FUN.VALUE = TRUE)],\n      use.names = FALSE\n    )\n  }\n\n  if (insight::is_empty_object(na) || (!is.factor(na) && !is.character(na))) {\n    if (isTRUE(verbose)) {\n      insight::format_alert(\n        \"Could not convert values into `NA` for a factor or character variable.\",\n        \"To do this, `na` needs to be a character vector, or a list that contains character vector elements.\"\n      )\n    }\n  } else {\n    matches <- which(x %in% na)\n    x[matches] <- NA\n    # drop unused labels\n    value_labels <- attr(x, \"labels\", exact = TRUE)\n    if (is.factor(x) && isTRUE(drop_levels)) {\n      # save label attribute\n      variable_label <- attr(x, \"label\", exact = TRUE)\n      x <- droplevels(x)\n      # droplevels() discards attributes, so we need to re-assign them\n      attr(x, \"label\") <- variable_label\n    }\n    attr(x, \"labels\") <- value_labels[!value_labels %in% na]\n  }\n  x\n}\n\n\n#' @export\nconvert_to_na.character <- convert_to_na.factor\n\n\n#' @export\nconvert_to_na.Date <- function(x, na = NULL, verbose = TRUE, ...) {\n  # if we have a list, use first valid element\n  if (is.list(na)) {\n    na <- na[vapply(na, .is_date, FUN.VALUE = logical(1L))]\n    if (length(na) > 1) {\n      na <- na[[1]]\n    }\n  }\n\n  if (insight::is_empty_object(na) || !.is_date(na)) {\n    if (isTRUE(verbose)) {\n      insight::format_alert(\n        \"Could not convert values into `NA` for a date/time variable.\",\n        \"To do this, `na` must be of class 'Date'.\"\n      )\n    }\n  } else {\n    matches <- which(x == na)\n    x[matches] <- NA\n  }\n  x\n}\n\n\n#' @export\nconvert_to_na.logical <- function(x, na = NULL, verbose = TRUE, ...) {\n  # if we have a list, use first valid element\n  if (is.list(na)) {\n    na <- unlist(\n      na[vapply(na, is.logical, FUN.VALUE = TRUE)],\n      use.names = FALSE\n    )\n  }\n\n  if (insight::is_empty_object(na) || !is.logical(na)) {\n    if (isTRUE(verbose)) {\n      insight::format_alert(\n        \"Could not convert values into `NA` for a logical variable.\",\n        \"To do this, `na` needs to be a logical vector, or a list that contains logical vector elements.\"\n      )\n    }\n  } else {\n    matches <- which(x == na)\n    x[matches] <- NA\n  }\n  x\n}\n\n\n#' @rdname convert_to_na\n#' @export\nconvert_to_na.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  na = NULL,\n  drop_levels = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  x[select] <- lapply(\n    x[select],\n    convert_to_na,\n    na = na,\n    drop_levels = drop_levels,\n    verbose = verbose,\n    ...\n  )\n\n  x\n}\n"
  },
  {
    "path": "R/data.R",
    "content": "#' @docType data\n#' @title Sample dataset from the National Health and Nutrition Examination Survey\n#' @name nhanes_sample\n#' @keywords data\n#'\n#' @description Selected variables from the National Health and Nutrition Examination\n#'              Survey that are used in the example from Lumley (2010), Appendix E.\n#'\n#' @references Lumley T (2010). Complex Surveys: a guide to analysis using R. Wiley\nNULL\n\n\n#' @docType data\n#' @title Sample dataset from the EFC Survey\n#' @name efc\n#' @keywords data\n#'\n#' @description Selected variables from the EUROFAMCARE survey. Useful when\n#' testing on \"real-life\" data sets, including random missing values. This\n#' data set also has value and variable label attributes.\nNULL\n"
  },
  {
    "path": "R/data_addprefix.R",
    "content": "#' Add a prefix or suffix to column names\n#'\n#' @rdname data_prefix_suffix\n#' @inheritParams extract_column_names\n#' @param pattern A character string, which will be added as prefix or suffix\n#' to the column names.\n#' @param ... Other arguments passed to or from other functions.\n#'\n#' @seealso\n#' [data_rename()] for more fine-grained column renaming.\n#' @examples\n#' # Add prefix / suffix to all columns\n#' head(data_addprefix(iris, \"NEW_\"))\n#' head(data_addsuffix(iris, \"_OLD\"))\n#'\n#' @export\ndata_addprefix <- function(\n  data,\n  pattern,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  selected_columns <- colnames(data) %in% select\n  colnames(data)[selected_columns] <- paste0(\n    pattern,\n    colnames(data)[selected_columns]\n  )\n  data\n}\n\n\n#' @rdname data_prefix_suffix\n#' @export\ndata_addsuffix <- function(\n  data,\n  pattern,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  selected_columns <- colnames(data) %in% select\n  colnames(data)[selected_columns] <- paste0(\n    colnames(data)[selected_columns],\n    pattern\n  )\n  data\n}\n"
  },
  {
    "path": "R/data_arrange.R",
    "content": "#' Arrange rows by column values\n#'\n#' `data_arrange()` orders the rows of a data frame by the values of selected\n#' columns.\n#'\n#' @param data A data frame, or an object that can be coerced to a data frame.\n#' @param select Character vector of column names. Use a dash just before column\n#'   name to arrange in decreasing order, for example `\"-x1\"`.\n#' @param safe Do not throw an error if one of the variables specified doesn't\n#'   exist.\n#'\n#' @return A data frame.\n#'\n#' @examples\n#'\n#' # Arrange using several variables\n#' data_arrange(head(mtcars), c(\"gear\", \"carb\"))\n#'\n#' # Arrange in decreasing order\n#' data_arrange(head(mtcars), \"-carb\")\n#'\n#' # Throw an error if one of the variables specified doesn't exist\n#' try(data_arrange(head(mtcars), c(\"gear\", \"foo\"), safe = FALSE))\n#' @export\ndata_arrange <- function(data, select = NULL, safe = TRUE) {\n  UseMethod(\"data_arrange\")\n}\n\n\n#' @export\ndata_arrange.default <- function(data, select = NULL, safe = TRUE) {\n  if (is.null(select) || length(select) == 0) {\n    return(data)\n  }\n\n  original_x <- data\n\n  # Input validation check\n  data <- .coerce_to_dataframe(data)\n\n  # Remove tidyverse attributes, will add them back at the end\n  if (inherits(original_x, \"tbl_df\")) {\n    tbl_input <- TRUE\n    data <- as.data.frame(data, stringsAsFactors = FALSE)\n  } else {\n    tbl_input <- FALSE\n  }\n\n  # find which vars should be decreasing\n  desc <- select[startsWith(select, \"-\")]\n  desc <- gsub(\"^-\", \"\", desc)\n  select <- gsub(\"^-\", \"\", select)\n\n  # check for variables that are not in data\n  dont_exist <- setdiff(select, colnames(data))\n\n  if (length(dont_exist) > 0) {\n    if (safe) {\n      insight::format_warning(\n        paste0(\n          \"The following column(s) don't exist in the dataset: \",\n          text_concatenate(dont_exist),\n          \".\"\n        ),\n        .misspelled_string(names(data), dont_exist, \"Possibly misspelled?\")\n      )\n    } else {\n      insight::format_error(\n        paste0(\n          \"The following column(s) don't exist in the dataset: \",\n          text_concatenate(dont_exist),\n          \".\"\n        ),\n        .misspelled_string(names(data), dont_exist, \"Possibly misspelled?\")\n      )\n    }\n    select <- select[-which(select %in% dont_exist)]\n  }\n\n  if (length(select) == 0) {\n    return(data)\n  }\n\n  already_sorted <- all(vapply(\n    data[, select, drop = FALSE],\n    .is_sorted,\n    logical(1L)\n  ))\n\n  if (isTRUE(already_sorted)) {\n    return(data)\n  }\n\n  out <- data\n\n  # reverse order for variables that should be decreasing\n  if (length(desc) > 0) {\n    for (i in desc) {\n      out[[i]] <- -xtfrm(out[[i]])\n    }\n  }\n\n  # apply ordering\n  if (length(select) == 1) {\n    out <- data[order(out[[select]]), , drop = FALSE]\n  } else {\n    out <- data[do.call(order, out[, select]), , drop = FALSE]\n  }\n\n  if (!insight::object_has_rownames(data)) {\n    rownames(out) <- NULL\n  }\n\n  # add back custom attributes\n  out <- .replace_attrs(out, attributes(original_x))\n\n  out\n}\n\n\n#' @export\ndata_arrange.grouped_df <- function(data, select = NULL, safe = TRUE) {\n  original_x <- data\n  grps <- attr(data, \"groups\", exact = TRUE)\n  grps <- grps[[\".rows\"]]\n\n  # Remove tidyverse attributes, will add them back at the end\n  if (inherits(data, \"tbl_df\")) {\n    tbl_input <- TRUE\n    data <- as.data.frame(data, stringsAsFactors = FALSE)\n  } else {\n    tbl_input <- FALSE\n  }\n\n  out <- lapply(grps, function(x) {\n    data_arrange.default(data[x, ], select = select, safe = safe)\n  })\n\n  out <- do.call(rbind, out)\n\n  if (!insight::object_has_rownames(data)) {\n    rownames(out) <- NULL\n  }\n\n  # add back tidyverse attributes\n  if (isTRUE(tbl_input)) {\n    class(out) <- c(\"tbl_df\", \"tbl\", \"data.frame\")\n  }\n\n  # add back custom attributes\n  out <- .replace_attrs(out, attributes(original_x))\n\n  out\n}\n"
  },
  {
    "path": "R/data_codebook.R",
    "content": "#' Generate a codebook of a data frame.\n#'\n#' `data_codebook()` generates codebooks from data frames, i.e. overviews\n#' of all variables and some more information about each variable (like\n#' labels, values or value range, frequencies, amount of missing values).\n#'\n#' @param data A data frame, or an object that can be coerced to a data frame.\n#' @param variable_label_width Length of variable labels. Longer labels will be\n#' wrapped at `variable_label_width` chars. If `NULL`, longer labels will not\n#' be split into multiple lines. Only applies to _labelled data_.\n#' @param value_label_width Length of value labels. Longer labels will be\n#' shortened, where the remaining part is truncated. Only applies to\n#' _labelled data_ or factor levels.\n#' @param range_at Indicates how many unique values in a numeric vector are\n#' needed in order to print a range for that variable instead of a frequency\n#' table for all numeric values. Can be useful if the data contains numeric\n#' variables with only a few unique values and where full frequency tables\n#' instead of value ranges should be displayed.\n#' @param max_values Number of maximum values that should be displayed. Can be\n#' used to avoid too many rows when variables have lots of unique values.\n#' @param font_size For HTML tables, the font size.\n#' @param line_padding For HTML tables, the distance (in pixel) between lines.\n#' @param row_color For HTML tables, the fill color for odd rows.\n#' @inheritParams standardize.data.frame\n#' @inheritParams extract_column_names\n#' @inheritParams data_tabulate\n#'\n#' @return A formatted data frame, summarizing the content of the data frame.\n#' Returned columns include the column index of the variables in the original\n#' data frame (`ID`), column name, variable label (if data is labelled), type\n#' of variable, number of missing values, unique values (or value range),\n#' value labels (for labelled data), and a frequency table (N for each value).\n#' Most columns are formatted as character vectors.\n#'\n#' @note There are methods to `print()` the data frame in a nicer output, as\n#' well methods for printing in markdown or HTML format (`print_md()` and\n#' `print_html()`). The `print()` method for text outputs passes arguments in\n#' `...` to [`insight::export_table()`].\n#'\n#' @examples\n#' data(iris)\n#' data_codebook(iris, select = starts_with(\"Sepal\"))\n#'\n#' data(efc)\n#' data_codebook(efc)\n#'\n#' # shorten labels\n#' data_codebook(efc, variable_label_width = 20, value_label_width = 15)\n#'\n#' # automatic range for numerics at more than 5 unique values\n#' data(mtcars)\n#' data_codebook(mtcars, select = starts_with(\"c\"))\n#'\n#' # force all values to be displayed\n#' data_codebook(mtcars, select = starts_with(\"c\"), range_at = 100)\n#' @export\ndata_codebook <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  variable_label_width = NULL,\n  value_label_width = NULL,\n  max_values = 10,\n  range_at = 6,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  data_name <- insight::safe_deparse(substitute(data))\n\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # check for emtpy columns, and remove\n  empty <- empty_columns(data[select])\n  if (length(empty)) {\n    if (verbose) {\n      insight::format_warning(\n        sprintf(\n          \"Following %i columns were empty and have been removed:\",\n          length(empty)\n        ),\n        text_concatenate(names(empty))\n      )\n    }\n    select <- select[-empty]\n  }\n\n  # check if any columns left, or found\n  if (!length(select) || is.null(select)) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\n        \"No column names that matched the required search pattern were found.\"\n      )\n    }\n    return(NULL)\n  }\n\n  # needed for % NA\n  rows <- nrow(data)\n  max_values <- max_values + 1\n\n  out <- lapply(seq_along(select), function(id) {\n    # variable\n    x <- data[[select[id]]]\n    x_na <- is.na(x)\n    x_inf <- is.infinite(x)\n\n    # inital data frame for codebook\n    d <- data.frame(\n      ID = which(colnames(data) == select[id]),\n      Name = select[id],\n      Type = .variable_type(x),\n      Missings = sprintf(\"%g (%.1f%%)\", sum(x_na), 100 * (sum(x_na) / rows)),\n      stringsAsFactors = FALSE,\n      row.names = NULL,\n      check.names = FALSE\n    )\n\n    # check if there are variable labels\n    variable_label <- .extract_variable_labels(x, variable_label_width)\n\n    # we may need to remove duplicated value range elements\n    flag_range <- FALSE\n\n    # save value labels\n    vallab <- attr(x, \"labels\", exact = TRUE)\n\n    # do we have labelled NA values? If so, include labelled NAs in count table\n    # we do this by converting NA values into character strings\n    if (anyNA(vallab) && insight::check_if_installed(\"haven\", quietly = TRUE)) {\n      # get na-tags, i.e. the value labels for the different NA values\n      na_labels <- haven::na_tag(vallab)\n      # replace NA in labels with NA tags\n      vallab[!is.na(na_labels)] <- stats::setNames(\n        paste0(\"NA(\", na_labels[!is.na(na_labels)], \")\"),\n        names(vallab[!is.na(na_labels)])\n      )\n      # replace tagged NAs in variable with their values, tagged as NA(value)\n      na_values <- haven::na_tag(x)\n      # need to convert, we still have haven-class, which cannot coerce\n      x <- as.character(x)\n      x[!is.na(na_values)] <- paste0(\"NA(\", na_values[!is.na(na_values)], \")\")\n      # update information on NA - we still might have non-labelled (regular) NA\n      x_na <- is.na(x)\n    }\n\n    # remove NA and Inf, for tabulate(). as.factor() will convert NaN\n    # to a factor level \"NaN\", which we don't want here (same for Inf),\n    # because tabulate() will then return frequencies for that level, too\n    x <- x[!(x_na | x_inf)]\n\n    # get unique values, to remove non labelled data\n    unique_values <- unique(x)\n\n    # coerce to factor, for tabulate(). We will coerce numerics to factor later\n    # which is required because tabulate() doesn't return frequencies for values\n    # lower than 1\n    if (!is.numeric(x) && !is.factor(x)) {\n      x <- as.factor(x)\n    }\n\n    # for ranges, we don't want the N% value, so use this to flag range-values\n    is_range <- FALSE\n\n    # handle labelled data - check if there are value labels or factor levels,\n    # and extract values and N\n    if (!is.null(vallab) && length(vallab)) {\n      # if not all values are labelled, fill in value labels\n      if (!all(unique_values %in% vallab)) {\n        new_vals <- setdiff(unique_values, vallab)\n        vallab <- c(vallab, stats::setNames(new_vals, new_vals))\n      }\n      # if not all value labels are present in the data, remove unused value labels\n      if (!all(vallab %in% unique_values)) {\n        not_needed <- setdiff(vallab, unique_values)\n        # match not needed values in vallab vector - values from labels\n        # may not be in sorted order (e.g. 1, 2, 3, -9), or may be character\n        # vectors in case of tagged NAs, so we have to make sure we know which\n        # values can be removed from vallab\n        not_needed <- stats::na.omit(match(not_needed, vallab))\n        vallab <- vallab[-not_needed]\n      }\n      # we now should have the same length of value labels and labelled values\n      # which should also match the numberof unique values in the vector.\n      # \"tabulate\" creates frequency tables by sorting by values/levels, so\n      # we need to make sure that labels are also in sorted order.\n      value_labels <- names(vallab)[order(unname(vallab))]\n      values <- sort(unname(vallab))\n      frq <- tabulate(as.factor(x))\n\n      # handle factors\n    } else if (is.factor(x)) {\n      values <- levels(x)\n      value_labels <- NA\n      frq <- tabulate(x)\n\n      # handle numerics\n    } else {\n      value_labels <- NA\n      # only range for too many unique values\n      if (length(unique_values) >= range_at) {\n        r <- range(x, na.rm = TRUE)\n        values <- sprintf(\"[%g, %g]\", round(r[1], 2), round(r[2], 2))\n        frq <- sum(!x_na)\n        flag_range <- length(variable_label) > 1\n        is_range <- TRUE\n        # if we have few values, we can print whole freq. table\n      } else {\n        values <- sort(unique_values)\n        frq <- tabulate(as.factor(x))\n      }\n    }\n\n    # tabulate fills 0 for non-existing values, remove those\n    frq <- frq[frq != 0]\n\n    # add Inf values?\n    if (any(x_inf) && length(frq) <= max_values) {\n      values <- c(values, Inf)\n      if (!is.na(value_labels)) {\n        value_labels <- c(value_labels, \"infinite\")\n      }\n      frq <- c(frq, sum(x_inf))\n      # Inf are added as value, so don't flag range any more,\n      # since we now have proportions for the range and the inf values.\n      is_range <- FALSE\n    }\n\n    # add proportions, but not for ranges, since these are always 100%\n    if (is_range) {\n      frq_proportions <- \"\"\n    } else {\n      frq_proportions <- sprintf(\"%.1f%%\", round(100 * (frq / sum(frq)), 1))\n    }\n\n    # make sure we have not too long rows, e.g. for variables that\n    # have dozens of unique values\n    if (length(value_labels) > max_values) {\n      value_labels <- value_labels[1:max_values]\n      value_labels[max_values] <- \"(...)\"\n    }\n    if (length(frq) > max_values) {\n      frq <- frq[1:max_values]\n      frq_proportions <- frq_proportions[1:max_values]\n      frq[max_values] <- NA\n      frq_proportions[max_values] <- NA\n    }\n    if (length(values) > max_values) {\n      values <- values[1:max_values]\n      values[max_values] <- \"(...)\"\n    }\n\n    # make sure length recycling doesn't fail, e.g. if we have split\n    # variable_label into two lines (i.e. vector of length 2), but we have\n    # 7 values in \"frq\", creating the data frame will fail. In this case,\n    # we have to make sure that recycling shorter vectors works.\n    if (length(variable_label) > 1 && !flag_range) {\n      variable_label <- variable_label[seq_along(frq)]\n    }\n\n    # shorten value labels\n    if (!is.null(value_label_width)) {\n      value_labels <- insight::format_string(\n        value_labels,\n        length = value_label_width\n      )\n    }\n\n    # add values, value labels and frequencies to data frame\n    d <- cbind(\n      d,\n      data.frame(\n        variable_label,\n        values,\n        value_labels,\n        frq,\n        proportions = frq_proportions,\n        stringsAsFactors = FALSE\n      )\n    )\n\n    # which columns need to be checked for duplicates?\n    duplicates <- c(\"ID\", \"Name\", \"Type\", \"Missings\", \"variable_label\")\n    if (isTRUE(flag_range)) {\n      # when we have numeric variables with value range as values, and when\n      # these variables had long variable labels that have been wrapped,\n      # the range value is duplicated (due to recycling), so we need to fix\n      # this here.\n      duplicates <- c(duplicates, c(\"values\", \"frq\", \"proportions\"))\n    }\n\n    # clear duplicates due to recycling\n    for (i in duplicates) {\n      d[[i]][duplicated(d[[i]])] <- \"\"\n    }\n\n    # remove empty rows\n    d <- remove_empty_rows(d)\n\n    # add empty row at the end, as separator\n    d[nrow(d) + 1, ] <- rep(\"\", ncol(d))\n\n    # add row ID\n    d$.row_id <- id\n    d\n  })\n\n  # clean-up (column order, rename, ...)\n  out <- .finalize_result(do.call(rbind, out))\n\n  # add attributes\n  .add_codebook_attributes(out, data_name, data, select)\n}\n\n\n# helper -----------------------\n\n#' @keywords internal\n.extract_variable_labels <- function(x, variable_label_width = NULL) {\n  varlab <- attr(x, \"label\", exact = TRUE)\n  if (!is.null(varlab) && length(varlab)) {\n    variable_label <- varlab\n    # if variable labels are too long, split into multiple elements\n    if (\n      !is.null(variable_label_width) &&\n        nchar(variable_label) > variable_label_width\n    ) {\n      variable_label <- insight::trim_ws(unlist(\n        strsplit(\n          text_wrap(variable_label, width = variable_label_width),\n          \"\\n\",\n          fixed = TRUE\n        ),\n        use.names = FALSE\n      ))\n    }\n  } else {\n    variable_label <- NA\n  }\n  variable_label\n}\n\n\n#' @keywords internal\n.finalize_result <- function(out) {\n  # rename\n  pattern <- c(\"variable_label\", \"values\", \"value_labels\", \"frq\", \"proportions\")\n  replacement <- c(\"Label\", \"Values\", \"Value Labels\", \"N\", \"Prop\")\n  for (i in seq_along(pattern)) {\n    names(out) <- replace(names(out), names(out) == pattern[i], replacement[i])\n  }\n\n  # remove all empty columns\n  out <- remove_empty_columns(out)\n\n  # reorder\n  column_order <- c(\n    \"ID\",\n    \"Name\",\n    \"Label\",\n    \"Type\",\n    \"Missings\",\n    \"Values\",\n    \"Value Labels\",\n    \"N\",\n    \"Prop\",\n    \".row_id\"\n  )\n  out[union(intersect(column_order, names(out)), names(out))]\n}\n\n\n#' @keywords internal\n.add_codebook_attributes <- function(out, data_name, data, select) {\n  attr(out, \"data_name\") <- data_name\n  attr(out, \"n_rows\") <- nrow(data)\n  attr(out, \"n_cols\") <- ncol(data)\n  attr(out, \"n_shown\") <- length(select)\n  class(out) <- c(\"data_codebook\", \"data.frame\")\n\n  out\n}\n\n\n# methods ----------------------\n\n#' @export\nformat.data_codebook <- function(x, format = \"text\", ...) {\n  # use [[\"N\"]] to avoid partial matching\n  if (any(stats::na.omit(nchar(x[[\"N\"]]) > 5))) {\n    x[[\"N\"]] <- insight::trim_ws(prettyNum(x[[\"N\"]], big.mark = \",\"))\n    x[[\"N\"]][x[[\"N\"]] == \"NA\" | is.na(x[[\"N\"]])] <- \"\"\n  }\n  # merge N and %\n  if (!is.null(x$Prop)) {\n    x$Prop[x$Prop == \"NA\" | is.na(x$Prop)] <- \"\"\n    # align only for text format\n    if (identical(format, \"text\")) {\n      x$Prop[x$Prop != \"\"] <- format(x$Prop[x$Prop != \"\"], justify = \"right\") # nolint\n    }\n    x[[\"N\"]][x$Prop != \"\"] <- sprintf(\n      # nolint\n      \"%s (%s)\",\n      as.character(x[[\"N\"]][x$Prop != \"\"]), # nolint\n      x$Prop[x$Prop != \"\"] # nolint\n    )\n    x$Prop <- NULL\n  }\n  x\n}\n\n\n#' @export\nprint.data_codebook <- function(x, ...) {\n  caption <- c(.get_codebook_caption(x), \"blue\")\n  x$.row_id <- NULL\n  cat(\n    insight::export_table(\n      format(x),\n      title = caption,\n      empty_line = \"-\",\n      cross = \"+\",\n      align = .get_codebook_align(x),\n      ...\n    )\n  )\n}\n\n\n#' @rdname data_codebook\n#' @export\nprint_html.data_codebook <- function(\n  x,\n  font_size = \"100%\",\n  line_padding = 3,\n  row_color = \"#eeeeee\",\n  ...\n) {\n  caption <- .get_codebook_caption(x)\n  attr(x, \"table_caption\") <- caption\n  # since we have each value at its own row, the HTML table contains\n  # horizontal borders for each cell/row. We want to remove those borders\n  # from rows that actually belong to one variable\n  separator_lines <- which(duplicated(x$.row_id) & x$N == \"\") # nolint\n  # remove separator lines, as we don't need these for HTML tables\n  x <- x[-separator_lines, ]\n  # check row IDs, and find odd rows\n  odd_rows <- (x$.row_id %% 2 == 1)\n  x$.row_id <- NULL\n  # create basic table\n  backend <- .check_format_backend(...)\n  out <- insight::export_table(\n    format(x, format = \"html\"),\n    title = caption,\n    format = backend,\n    align = .get_codebook_align(x)\n  )\n\n  # for tiny table output, we don't need to do any further formatting\n  if (identical(backend, \"tt\")) {\n    return(out)\n  }\n\n  insight::check_if_installed(\"gt\")\n  # no border for rows which are not separator lines\n  out <- gt::tab_style(\n    out,\n    style = list(gt::cell_borders(sides = \"top\", style = \"hidden\")),\n    locations = gt::cells_body(rows = which(x$ID == \"\")) # nolint\n  )\n  # highlight odd rows\n  if (!is.null(row_color)) {\n    out <- gt::tab_style(\n      out,\n      style = list(gt::cell_fill(color = row_color)),\n      locations = gt::cells_body(rows = odd_rows)\n    )\n  }\n  # set up additonal HTML options\n  gt::tab_options(\n    out,\n    table.font.size = font_size,\n    data_row.padding = gt::px(line_padding)\n  )\n}\n\n\n#' @rdname data_codebook\n#' @export\ndisplay.data_codebook <- function(\n  object,\n  format = \"markdown\",\n  font_size = \"100%\",\n  line_padding = 3,\n  row_color = \"#eeeeee\",\n  ...\n) {\n  format <- .display_default_format(format)\n\n  fun_args <- list(\n    x = object,\n    font_size = font_size,\n    line_padding = line_padding,\n    row_color = row_color,\n    ...\n  )\n\n  # print table in HTML or markdown format\n  if (format %in% c(\"html\", \"tt\")) {\n    fun_args$backend <- format\n    do.call(print_html, fun_args)\n  } else {\n    do.call(print_md, fun_args)\n  }\n}\n\n\n#' @export\nprint_md.data_codebook <- function(x, ...) {\n  caption <- .get_codebook_caption(x)\n  x$.row_id <- NULL\n  attr(x, \"table_caption\") <- caption\n  insight::export_table(\n    format(x, format = \"markdown\"),\n    title = caption,\n    align = .get_codebook_align(x),\n    format = \"markdown\"\n  )\n}\n\n\n# helper ---------\n\n.get_codebook_caption <- function(x) {\n  n_rows <- as.character(attributes(x)$n_rows)\n  if (nchar(n_rows) > 5) {\n    n_rows <- prettyNum(n_rows, big.mark = \",\")\n  }\n  sprintf(\n    \"%s (%s rows and %i variables, %i shown)\",\n    attributes(x)$data_name,\n    n_rows,\n    attributes(x)$n_cols,\n    attributes(x)$n_shown\n  )\n}\n\n.get_codebook_align <- function(x) {\n  # need to remove this one\n  x$Prop <- NULL\n  align <- c(\n    ID = \"l\",\n    Name = \"l\",\n    Label = \"l\",\n    Type = \"l\",\n    Missings = \"r\",\n    Values = \"r\",\n    `Value Labels` = \"l\",\n    N = \"r\"\n  )\n  align <- align[colnames(x)]\n  paste(unname(align), collapse = \"\")\n}\n"
  },
  {
    "path": "R/data_duplicated.R",
    "content": "#' @title Extract all duplicates\n#'\n#' @description Extract all duplicates, for visual inspection.\n#' Note that it also contains the first occurrence of future\n#' duplicates, unlike [duplicated()] or [dplyr::distinct()]). Also\n#' contains an additional column reporting the number of missing\n#' values for that row, to help in the decision-making when\n#' selecting which duplicates to keep.\n#'\n#' @inheritParams extract_column_names\n#'\n#' @keywords duplicates\n#' @export\n#' @seealso\n#' [data_unique()]\n#' @return A dataframe, containing all duplicates.\n#' @examples\n#' df1 <- data.frame(\n#'   id = c(1, 2, 3, 1, 3),\n#'   year = c(2022, 2022, 2022, 2022, 2000),\n#'   item1 = c(NA, 1, 1, 2, 3),\n#'   item2 = c(NA, 1, 1, 2, 3),\n#'   item3 = c(NA, 1, 1, 2, 3)\n#' )\n#'\n#' data_duplicated(df1, select = \"id\")\n#'\n#' data_duplicated(df1, select = c(\"id\", \"year\"))\n#'\n#' # Filter to exclude duplicates\n#' df2 <- df1[-c(1, 5), ]\n#' df2\n#'\ndata_duplicated <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n) {\n  UseMethod(\"data_duplicated\")\n}\n\n#' @export\ndata_duplicated.data.frame <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n) {\n  select <- .select_nse(\n    select,\n    data,\n    exclude = exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  data$temporary_id <- do.call(paste, c(data_select(data, select), sep = \"_\"))\n\n  data <- cbind(Row = seq_len(nrow(data)), data)\n  dups.index <- data$temporary_id %in%\n    data$temporary_id[duplicated(data$temporary_id)]\n  dups <- data[dups.index, ]\n  dups$count_na <- rowSums(is.na(dups))\n  dups <- data_arrange(dups, select)\n  dups <- data_remove(dups, \"temporary_id\")\n  dups\n}\n\n#' @export\ndata_duplicated.grouped_df <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n) {\n  select <- .select_nse(\n    select,\n    data,\n    exclude = exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  grps <- attr(data, \"groups\", exact = TRUE)\n  grps <- grps[[\".rows\"]]\n\n  data <- as.data.frame(data)\n\n  out <- lapply(grps, function(x) {\n    data_duplicated.data.frame(data[x, ], select = select)\n  })\n\n  out <- do.call(rbind, out)\n\n  out\n}\n"
  },
  {
    "path": "R/data_extract.R",
    "content": "#' Extract one or more columns or elements from an object\n#'\n#' `data_extract()` (or its alias `extract()`) is similar to `$`. It extracts\n#' either a single column or element from an object (e.g., a data frame, list),\n#' or multiple columns resp. elements.\n#'\n#' @param data The object to subset. Methods are currently available for data frames\n#'   and data frame extensions (e.g., tibbles).\n#' @param name An optional argument that specifies the column to be used as\n#'   names for the vector elements after extraction. Must be specified either\n#'   as literal variable name (e.g., `column_name`) or as string\n#'   (`\"column_name\"`). `name` will be ignored when a data frame is returned.\n#' @param extract String, indicating which element will be extracted when `select`\n#'   matches multiple variables. Can be `\"all\"` (the default) to return all\n#'   matched variables, `\"first\"` or `\"last\"` to return the first or last match,\n#'   or `\"odd\"` and `\"even\"` to return all odd-numbered or even-numbered\n#'   matches. Note that `\"first\"` or `\"last\"` return a vector (unless\n#'   `as_data_frame = TRUE`), while `\"all\"` can return a vector (if only one\n#'   match was found) *or* a data frame (for more than one match). Type safe\n#'   return values are only possible when `extract` is `\"first\"` or `\"last\"` (will\n#'   always return a vector) or when `as_data_frame = TRUE` (always returns a\n#'   data frame).\n#' @param as_data_frame Logical, if `TRUE`, will always return a data frame,\n#'   even if only one variable was matched. If `FALSE`, either returns a vector\n#'   or a data frame. See `extract` for details.\n#' @param verbose Toggle warnings.\n#' @param ... For use by future methods.\n#'\n#' @inheritParams extract_column_names\n#'\n#' @details `data_extract()` can be used to select multiple variables or pull a\n#' single variable from a data frame. Thus, the return value is by default not\n#' type safe - `data_extract()` either returns a vector or a data frame.\n#' \\subsection{Extracting single variables (vectors)}{\n#' When `select` is the name of a single column, or when select only matches\n#' one column, a vector is returned. A single variable is also returned when\n#' `extract` is either `\"first` or `\"last\"`. Setting `as_data_frame` to `TRUE`\n#' overrides this behaviour and *always* returns a data frame.\n#' }\n#' \\subsection{Extracting a data frame of variables}{\n#' When `select` is a character vector containing more than one column name (or\n#' a numeric vector with more than one valid column indices), or when `select`\n#' uses one of the supported select-helpers that match multiple columns, a\n#' data frame is returned. Setting `as_data_frame` to `TRUE` *always* returns\n#' a data frame.\n#' }\n#'\n#' @return A vector (or a data frame) containing the extracted element, or\n#'   `NULL` if no matching variable was found.\n#' @export\n#'\n#' @examples\n#' # single variable\n#' data_extract(mtcars, cyl, name = gear)\n#' data_extract(mtcars, \"cyl\", name = gear)\n#' data_extract(mtcars, -1, name = gear)\n#' data_extract(mtcars, cyl, name = 0)\n#' data_extract(mtcars, cyl, name = \"row.names\")\n#'\n#' # selecting multiple variables\n#' head(data_extract(iris, starts_with(\"Sepal\")))\n#' head(data_extract(iris, ends_with(\"Width\")))\n#' head(data_extract(iris, 2:4))\n#'\n#' # select first of multiple variables\n#' data_extract(iris, starts_with(\"Sepal\"), extract = \"first\")\n#'\n#' # select first of multiple variables, return as data frame\n#' head(data_extract(iris, starts_with(\"Sepal\"), extract = \"first\", as_data_frame = TRUE))\ndata_extract <- function(data, select, ...) {\n  UseMethod(\"data_extract\")\n}\n\n#' @rdname data_extract\n#' @export\ndata_extract.data.frame <- function(\n  data,\n  select,\n  name = NULL,\n  extract = \"all\",\n  as_data_frame = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  extract <- match.arg(\n    tolower(extract),\n    choices = c(\"all\", \"first\", \"last\", \"odd\", \"even\")\n  )\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    data,\n    exclude = NULL,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # nothing to select?\n  if (!length(select)) {\n    return(NULL)\n  }\n\n  nl <- as.list(seq_along(data))\n  names(nl) <- names(data)\n  name <- eval(substitute(name), nl, parent.frame())\n\n  if (is.numeric(name) && length(name) == 1L) {\n    if (name < 0L) {\n      name <- ncol(data) + name + 1L\n    } else if (name == 0L) {\n      name <- rownames(data)\n    }\n  } else if (is.character(name) && identical(name, \"row.names\")) {\n    name <- rownames(data)\n  }\n\n  # chose which matched variables to extract\n  select <- switch(\n    extract,\n    first = select[1L],\n    last = select[length(select)],\n    odd = select[seq(1L, length(select), 2L)],\n    even = select[seq(2L, length(select), 2L)],\n    select\n  )\n\n  # \"name\" only used for naming elements in a vector, not data frame\n  needs_no_names <- isTRUE(as_data_frame) ||\n    # more than one variable means data frame, so no name\n    length(select) > 1L ||\n    # if we have only one variable, but number of observations not equal to\n    # length of names, we have no proper match, so no naming, too.\n    (length(select) == 1L &&\n      length(name) > 1L &&\n      length(data[[select]]) != length(name))\n\n  if (needs_no_names) {\n    name <- NULL\n  }\n\n  # we definitely should have a vector here when name not NULL\n  if (is.null(name)) {\n    data[, select, drop = !as_data_frame]\n  } else {\n    # if name indicates a variable, extract values for naming now\n    if (length(name) == 1L) {\n      name <- data[[name]]\n    }\n    stats::setNames(data[[select]], name)\n  }\n}\n"
  },
  {
    "path": "R/data_group.R",
    "content": "#' @title Create a grouped data frame\n#' @name data_group\n#'\n#' @description This function is comparable to `dplyr::group_by()`, but just\n#' following the **datawizard** function design. `data_ungroup()` removes the\n#' grouping information from a grouped data frame.\n#'\n#' @param data A data frame\n#' @inheritParams extract_column_names\n#'\n#' @return A grouped data frame, i.e. a data frame with additional information\n#' about the grouping structure saved as attributes.\n#'\n#' @examplesIf requireNamespace(\"poorman\")\n#' data(efc)\n#' suppressPackageStartupMessages(library(poorman, quietly = TRUE))\n#'\n#' # total mean\n#' efc %>%\n#'   summarize(mean_hours = mean(c12hour, na.rm = TRUE))\n#'\n#' # mean by educational level\n#' efc %>%\n#'   data_group(c172code) %>%\n#'   summarize(mean_hours = mean(c12hour, na.rm = TRUE))\n#' @export\ndata_group <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # variables for grouping\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n  # create grid with combinations of all levels\n  my_grid <- as.data.frame(expand.grid(lapply(data[select], unique)))\n  # sort grid\n  my_grid <- my_grid[do.call(order, my_grid), , drop = FALSE]\n\n  .rows <- lapply(seq_len(nrow(my_grid)), function(i) {\n    as.integer(data_match(\n      data,\n      to = my_grid[i, , drop = FALSE],\n      match = \"and\",\n      return_indices = TRUE,\n      remove_na = FALSE\n    ))\n  })\n  my_grid[[\".rows\"]] <- .rows\n\n  # remove data_match attributes\n  attr(my_grid, \"out.attrs\") <- NULL\n  attr(my_grid, \".drop\") <- TRUE\n\n  attr(data, \"groups\") <- my_grid\n  class(data) <- unique(c(\"grouped_df\", \"data.frame\"), class(data))\n\n  data\n}\n\n\n#' @rdname data_group\n#' @export\ndata_ungroup <- function(data, verbose = TRUE, ...) {\n  attr(data, \"groups\") <- NULL\n  class(data) <- unique(setdiff(class(data), \"grouped_df\"))\n\n  data\n}\n"
  },
  {
    "path": "R/data_match.R",
    "content": "#' Return filtered or sliced data frame, or row indices\n#'\n#' Return a filtered (or sliced) data frame or row indices of a data frame that\n#' match a specific condition. `data_filter()` works like `data_match()`, but works\n#' with logical expressions or row indices of a data frame to specify matching\n#' conditions.\n#'\n#' @param x A data frame.\n#' @param to A data frame matching the specified conditions. Note that if\n#'   `match` is a value other than `\"and\"`, the original row order might be\n#'   changed. See 'Details'.\n#' @param match String, indicating with which logical operation matching\n#'   conditions should be combined. Can be `\"and\"` (or `\"&\"`), `\"or\"` (or `\"|\"`)\n#'   or `\"not\"` (or `\"!\"`).\n#' @param return_indices Logical, if `TRUE`, return the vector of rows that\n#'   can be used to filter the original data frame. If `FALSE` (default),\n#'   returns directly the filtered data frame instead of the row indices.\n#' @param remove_na Logical, if `TRUE`, missing values (`NA`s) are removed before\n#'   filtering the data. This is the default behaviour, however, sometimes when\n#'   row indices are requested (i.e. `return_indices=TRUE`), it might be useful\n#'   to preserve `NA` values, so returned row indices match the row indices of\n#'   the original data frame.\n#' @param ... A sequence of logical expressions indicating which rows to keep,\n#'   or a numeric vector indicating the row indices of rows to keep. Can also be\n#'   a string representation of a logical expression (e.g. `\"x > 4\"`), a\n#'   character vector (e.g. `c(\"x > 4\", \"y == 2\")`) or a variable that contains\n#'   the string representation of a logical expression. These might be useful\n#'   when used in packages to avoid defining undefined global variables.\n#'\n#' @return A filtered data frame, or the row indices that match the specified\n#' configuration.\n#'\n#' @details For `data_match()`, if `match` is either `\"or\"` or `\"not\"`, the\n#' original row order from `x` might be changed. If preserving row order is\n#' required, use `data_filter()` instead.\n#'\n#' ```\n#' # mimics subset() behaviour, preserving original row order\n#' head(data_filter(mtcars[c(\"mpg\", \"vs\", \"am\")], vs == 0 | am == 1))\n#' #>                    mpg vs am\n#' #> Mazda RX4         21.0  0  1\n#' #> Mazda RX4 Wag     21.0  0  1\n#' #> Datsun 710        22.8  1  1\n#' #> Hornet Sportabout 18.7  0  0\n#' #> Duster 360        14.3  0  0\n#' #> Merc 450SE        16.4  0  0\n#'\n#' # re-sorting rows\n#' head(data_match(mtcars[c(\"mpg\", \"vs\", \"am\")],\n#'                 data.frame(vs = 0, am = 1),\n#'                 match = \"or\"))\n#' #>                    mpg vs am\n#' #> Mazda RX4         21.0  0  1\n#' #> Mazda RX4 Wag     21.0  0  1\n#' #> Hornet Sportabout 18.7  0  0\n#' #> Duster 360        14.3  0  0\n#' #> Merc 450SE        16.4  0  0\n#' #> Merc 450SL        17.3  0  0\n#' ```\n#'\n#' While `data_match()` works with data frames to match conditions against,\n#' `data_filter()` is basically a wrapper around `subset(subset = <filter>)`.\n#' However, unlike `subset()`, it preserves label attributes and is useful when\n#' working with labelled data.\n#'\n#' @examples\n#' data_match(mtcars, data.frame(vs = 0, am = 1))\n#' data_match(mtcars, data.frame(vs = 0, am = c(0, 1)))\n#'\n#' # observations where \"vs\" is NOT 0 AND \"am\" is NOT 1\n#' data_match(mtcars, data.frame(vs = 0, am = 1), match = \"not\")\n#' # equivalent to\n#' data_filter(mtcars, vs != 0 & am != 1)\n#'\n#' # observations where EITHER \"vs\" is 0 OR \"am\" is 1\n#' data_match(mtcars, data.frame(vs = 0, am = 1), match = \"or\")\n#' # equivalent to\n#' data_filter(mtcars, vs == 0 | am == 1)\n#'\n#' # slice data frame by row indices\n#' data_filter(mtcars, 5:10)\n#'\n#' # Define a custom function containing data_filter()\n#' my_filter <- function(data, variable) {\n#'   data_filter(data, variable)\n#' }\n#' my_filter(mtcars, \"cyl == 6\")\n#'\n#' # Pass complete filter-condition as string.\n#' my_filter <- function(data, condition) {\n#'   data_filter(data, condition)\n#' }\n#' my_filter(mtcars, \"am != 0\")\n#'\n#' # string can also be used directly as argument\n#' data_filter(mtcars, \"am != 0\")\n#'\n#' # or as variable\n#' fl <- \"am != 0\"\n#' data_filter(mtcars, fl)\n#' @inherit data_rename seealso\n#' @export\ndata_match <- function(\n  x,\n  to,\n  match = \"and\",\n  return_indices = FALSE,\n  remove_na = TRUE,\n  ...\n) {\n  if (!is.data.frame(to)) {\n    to <- as.data.frame(to)\n  }\n  original_x <- x\n\n  # evaluate\n  match <- match.arg(\n    tolower(match),\n    c(\"and\", \"&\", \"&&\", \"or\", \"|\", \"||\", \"!\", \"not\")\n  )\n  match <- switch(\n    match,\n    `&` = ,\n    `&&` = ,\n    and = \"and\",\n    `!` = ,\n    not = \"not\",\n    \"or\"\n  )\n\n  # validation check\n  shared_columns <- intersect(colnames(x), colnames(to))\n  if (is.null(shared_columns) || length(shared_columns) == 0L) {\n    insight::format_error(\n      \"None of the columns from the data frame with matching conditions were found in `x`.\"\n    )\n  }\n\n  # only select common columns\n  x <- x[shared_columns]\n\n  # prepare\n  if (identical(match, \"or\")) {\n    idx <- vector(\"numeric\", length = 0L)\n  } else {\n    # remove missings before matching\n    if (isTRUE(remove_na)) {\n      x <- x[stats::complete.cases(x), , drop = FALSE]\n    }\n    idx <- seq_len(nrow(x))\n  }\n\n  # Find matching rows\n  for (col in names(to)) {\n    values <- x[[col]]\n    if (match == \"or\") {\n      idx <- union(idx, which(values %in% to[[col]]))\n    } else if (match == \"not\") {\n      idx <- idx[!values[idx] %in% to[[col]]]\n    } else {\n      idx <- idx[values[idx] %in% to[[col]]]\n    }\n  }\n\n  # prepare output\n  if (isFALSE(return_indices)) {\n    out <- original_x[idx, , drop = FALSE]\n    # restore value and variable labels\n    for (i in colnames(out)) {\n      attr(out[[i]], \"label\") <- attr(original_x[[i]], \"label\", exact = TRUE)\n      attr(out[[i]], \"labels\") <- attr(original_x[[i]], \"labels\", exact = TRUE)\n    }\n  } else {\n    out <- idx\n  }\n\n  # add back custom attributes\n  out <- .replace_attrs(out, attributes(original_x))\n  out\n}\n\n\n#' @rdname data_match\n#' @export\ndata_filter <- function(x, ...) {\n  UseMethod(\"data_filter\")\n}\n\n#' @export\ndata_filter.data.frame <- function(x, ...) {\n  out <- x\n\n  # convert tibble to data.frame\n  if (inherits(x, \"tbl_df\")) {\n    out <- as.data.frame(out, stringsAsFactors = FALSE)\n    tbl_input <- TRUE\n  } else {\n    tbl_input <- FALSE\n  }\n\n  dots <- match.call(expand.dots = FALSE)[[\"...\"]]\n\n  if (any(nzchar(names(dots), keepNA = TRUE))) {\n    insight::format_error(\n      \"Filtering did not work. Please check if you need `==` (instead of `=`) for comparison.\"\n    )\n  }\n\n  # turn character vector (like `c(\"mpg <= 20\", \"cyl == 6\")`) into symbols\n  if (length(dots) == 1) {\n    character_vector <- .dynEval(dots[[1]], ifnotfound = NULL)\n    if (is.character(character_vector) && length(character_vector) > 1) {\n      dots <- lapply(character_vector, str2lang)\n    }\n  }\n\n  # Check syntax of the filter. Must be done *before* calling subset()\n  # (cf easystats/datawizard#237)\n  for (.fcondition in dots) {\n    .check_filter_syntax(insight::safe_deparse(.fcondition))\n  }\n\n  for (i in seq_along(dots)) {\n    # only proceed when result is still valid\n    if (!is.null(out)) {\n      symbol <- dots[[i]]\n      # evaluate, we may have a variable with filter expression\n      eval_symbol <- .dynEval(symbol, ifnotfound = NULL)\n      # validation check: is variable named like a function?\n      if (is.function(eval_symbol)) {\n        eval_symbol <- .dynGet(symbol, ifnotfound = NULL)\n      }\n      eval_symbol_numeric <- NULL\n      if (!is.null(eval_symbol)) {\n        # when possible to evaluate, do we have a numeric vector provided\n        # as string? (e.g. `\"5:10\"`) - then try to coerce to numeric\n        eval_symbol_numeric <- tryCatch(\n          eval(parse(text = eval_symbol)),\n          error = function(e) NULL\n        )\n      }\n\n      # here we go when we have a filter expression, and no numeric vector to slice\n      if (\n        is.null(eval_symbol) ||\n          (!is.numeric(eval_symbol) && !is.numeric(eval_symbol_numeric))\n      ) {\n        # could be evaluated? Then filter expression is a string and we need\n        # to convert into symbol\n        if (is.character(eval_symbol)) {\n          symbol <- str2lang(eval_symbol)\n        }\n        # filter data\n        out <- tryCatch(\n          subset(out, subset = eval(symbol, envir = new.env())),\n          warning = function(e) e,\n          error = function(e) e\n        )\n      } else if (is.numeric(eval_symbol)) {\n        # if symbol could be evaluated and is numeric, slice\n        out <- tryCatch(out[eval_symbol, , drop = FALSE], error = function(e) {\n          NULL\n        })\n      } else if (is.numeric(eval_symbol_numeric)) {\n        # if symbol could be evaluated, was string and could be converted to numeric, slice\n        out <- tryCatch(\n          out[eval_symbol_numeric, , drop = FALSE],\n          error = function(e) NULL\n        )\n      }\n\n      if (inherits(out, c(\"simpleError\", \"objectNotFoundError\"))) {\n        error_msg <- out$message[1]\n        # try to find out which variable was the cause for the error\n        if (grepl(\"object '(.*)' not found\", error_msg)) {\n          error_var <- gsub(\"object '(.*)' not found\", \"\\\\1\", error_msg)\n          # some syntax errors do not relate to misspelled variables...\n          if (!error_var %in% colnames(x)) {\n            insight::format_error(\n              paste0(\n                \"Variable \\\"\",\n                error_var,\n                \"\\\" was not found in the dataset.\"\n              ),\n              .misspelled_string(colnames(x), error_var, \"Possibly misspelled?\")\n            )\n          }\n        }\n        out <- NULL\n      }\n    }\n  }\n\n  if (is.null(out)) {\n    insight::format_error(\n      \"Filtering did not work. Please check the syntax of your conditions.\"\n    )\n  }\n\n  # restore value and variable labels\n  for (i in colnames(out)) {\n    attr(out[[i]], \"label\") <- attr(x[[i]], \"label\", exact = TRUE)\n    attr(out[[i]], \"labels\") <- attr(x[[i]], \"labels\", exact = TRUE)\n  }\n\n  # add back custom attributes\n  out <- .replace_attrs(out, attributes(x))\n\n  # add back tidyverse attributes\n  if (isTRUE(tbl_input)) {\n    class(out) <- c(\"tbl_df\", \"tbl\", \"data.frame\")\n  }\n\n  out\n}\n\n\n#' @export\ndata_filter.grouped_df <- function(x, ...) {\n  original_x <- x\n  grps <- attr(x, \"groups\", exact = TRUE)\n  grps <- grps[[\".rows\"]]\n\n  # Remove tidyverse attributes, will add them back at the end\n  if (inherits(x, \"tbl_df\")) {\n    tbl_input <- TRUE\n    x <- as.data.frame(x, stringsAsFactors = FALSE)\n  } else {\n    tbl_input <- FALSE\n  }\n\n  dots <- match.call(expand.dots = FALSE)[[\"...\"]]\n  out <- lapply(grps, function(grp) {\n    arguments <- list(x[grp, ])\n    arguments <- c(arguments, dots)\n    do.call(\"data_filter.data.frame\", arguments)\n  })\n\n  out <- do.call(rbind, out)\n\n  if (!insight::object_has_rownames(x)) {\n    rownames(out) <- NULL\n  }\n\n  # add back tidyverse attributes\n  if (isTRUE(tbl_input)) {\n    class(out) <- c(\"tbl_df\", \"tbl\", \"data.frame\")\n  }\n\n  # add back custom attributes\n  out <- .replace_attrs(out, attributes(original_x))\n\n  out\n}\n\n\n# helper -------------------\n\n.check_filter_syntax <- function(.fcondition) {\n  # NOTE: We cannot check for `=` when \"filter\" is not a character vector\n  # because the function will then fail in general. I.e.,\n  # \"data_filter(mtcars, filter = mpg > 10 & cyl = 4)\" will not start\n  # running this function and never reaches the first code line,\n  # but immediately stops...\n  tmp <- gsub(\"==\", \"\", .fcondition, fixed = TRUE)\n  tmp <- gsub(\"<=\", \"\", tmp, fixed = TRUE)\n  tmp <- gsub(\">=\", \"\", tmp, fixed = TRUE)\n  tmp <- gsub(\"!=\", \"\", tmp, fixed = TRUE)\n\n  # We want to check whether user used a \"=\" in the filter syntax. This\n  # typically indicates that the comparison \"==\" is probably wrong by using\n  # a \"=\" instead of `\"==\"`. However, if a function was provided, we indeed\n  # may have \"=\", e.g. if the pattern was\n  # `data_filter(out, grep(\"pattern\", x = value))`. We thus first check if we\n  # can identify a function call, and only continue checking for wrong syntax\n  # when we have not identified a function.\n\n  if (\n    !is.function(tryCatch(\n      get(gsub(\"^(.*?)\\\\((.*)\", \"\\\\1\", tmp)),\n      error = function(e) NULL\n    ))\n  ) {\n    # Give more informative message to users\n    # about possible misspelled comparisons / logical conditions\n    # check if \"=\" instead of \"==\" was used?\n    if (any(grepl(\"=\", tmp, fixed = TRUE))) {\n      insight::format_error(\n        \"Filtering did not work. Please check if you need `==` (instead of `=`) for comparison.\"\n      )\n    }\n    # check if \"&&\" etc instead of \"&\" was used?\n    logical_operator <- NULL\n    if (any(grepl(\"&&\", .fcondition, fixed = TRUE))) {\n      logical_operator <- \"&&\"\n    }\n    if (any(grepl(\"||\", .fcondition, fixed = TRUE))) {\n      logical_operator <- \"||\"\n    }\n    if (!is.null(logical_operator)) {\n      insight::format_error(\n        paste0(\n          \"Filtering did not work. Please check if you need `\",\n          substr(logical_operator, 0, 1),\n          \"` (instead of `\",\n          logical_operator,\n          \"`) as logical operator.\"\n        )\n      )\n    }\n  }\n}\n"
  },
  {
    "path": "R/data_merge.R",
    "content": "#' @title Merge (join) two data frames, or a list of data frames\n#' @name data_merge\n#'\n#' @description\n#' Merge (join) two data frames, or a list of data frames. However, unlike\n#' base R's `merge()`, `data_merge()` offers a few more methods to join data\n#' frames, and it does not drop data frame nor column attributes.\n#'\n#' @param x,y A data frame to merge. `x` may also be a list of data frames\n#'   that will be merged. Note that the list-method has no `y` argument.\n#' @param join Character vector, indicating the method of joining the data frames.\n#'   Can be `\"full\"`, `\"left\"` (default), `\"right\"`, `\"inner\"`, `\"anti\"`, `\"semi\"`\n#'   or `\"bind\"`. See details below.\n#' @param by Specifications of the columns used for merging.\n#' @param id Optional name for ID column that will be created to indicate the\n#'   source data frames for appended rows. Only applies if `join = \"bind\"`.\n#' @param verbose Toggle warnings.\n#' @param ... Not used.\n#'\n#' @return\n#' A merged data frame.\n#'\n#' @section Merging data frames:\n#'\n#' Merging data frames is performed by adding rows (cases), columns\n#' (variables) or both from the source data frame (`y`) to the target\n#' data frame (`x`). This usually requires one or more variables which\n#' are included in both data frames and that are used for merging, typically\n#' indicated with the `by` argument. When `by` contains a variable present\n#' in both data frames, cases are matched and filtered by identical values\n#' of `by` in `x` and `y`.\n#'\n#' @section Left- and right-joins:\n#'\n#' Left- and right joins usually don't add new rows (cases), but only new\n#' columns (variables) for existing cases in `x`. For `join = \"left\"` or\n#' `join = \"right\"` to work, `by` *must* indicate one or more columns that\n#' are included in both data frames. For `join = \"left\"`, if `by` is an\n#' identifier variable, which is included in both `x` and `y`, all variables\n#' from `y` are copied to `x`, but only those cases from `y` that have\n#' matching values in their identifier variable in `x` (i.e. all cases\n#' in `x` that are also found in `y` get the related values from the new\n#' columns in `y`). If there is no match between identifiers in `x` and `y`,\n#' the copied variable from `y` will get a `NA` value for this particular\n#' case. Other variables that occur both in `x` and `y`, but are not used\n#' as identifiers (with `by`), will be renamed to avoid multiple identical\n#' variable names. Cases in `y` where values from the identifier have no\n#' match in `x`'s identifier are removed. `join = \"right\"` works in\n#' a similar way as `join = \"left\"`, just that only cases from `x` that\n#' have matching values in their identifier variable in `y` are chosen.\n#'\n#' In base R, these are equivalent to `merge(x, y, all.x = TRUE)` and\n#' `merge(x, y, all.y = TRUE)`.\n#'\n#' @section Full joins:\n#'\n#' Full joins copy all cases from `y` to `x`. For matching cases in both\n#' data frames, values for new variables are copied from `y` to `x`. For\n#' cases in `y` not present in `x`, these will be added as new rows to `x`.\n#' Thus, full joins not only add new columns (variables), but also might\n#' add new rows (cases).\n#'\n#' In base R, this is equivalent to `merge(x, y, all = TRUE)`.\n#'\n#' @section Inner joins:\n#'\n#' Inner joins merge two data frames, however, only those rows (cases) are\n#' kept that are present in both data frames. Thus, inner joins usually\n#' add new columns (variables), but also remove rows (cases) that only\n#' occur in one data frame.\n#'\n#' In base R, this is equivalent to `merge(x, y)`.\n#'\n#' @section Binds:\n#'\n#' `join = \"bind\"` row-binds the complete second data frame `y` to `x`.\n#' Unlike simple `rbind()`, which requires the same columns for both data\n#' frames, `join = \"bind\"` will bind shared columns from `y` to `x`, and\n#' add new columns from `y` to `x`.\n#'\n#' @examples\n#'\n#' x <- data.frame(a = 1:3, b = c(\"a\", \"b\", \"c\"), c = 5:7, id = 1:3)\n#' y <- data.frame(c = 6:8, d = c(\"f\", \"g\", \"h\"), e = 100:102, id = 2:4)\n#'\n#' x\n#' y\n#'\n#' # \"by\" will default to all shared columns, i.e. \"c\" and \"id\". new columns\n#' # \"d\" and \"e\" will be copied from \"y\" to \"x\", but there are only two cases\n#' # in \"x\" that have the same values for \"c\" and \"id\" in \"y\". only those cases\n#' # have values in the copied columns, the other case gets \"NA\".\n#' data_merge(x, y, join = \"left\")\n#'\n#' # we change the id-value here\n#' x <- data.frame(a = 1:3, b = c(\"a\", \"b\", \"c\"), c = 5:7, id = 1:3)\n#' y <- data.frame(c = 6:8, d = c(\"f\", \"g\", \"h\"), e = 100:102, id = 3:5)\n#'\n#' x\n#' y\n#'\n#' # no cases in \"y\" have the same matching \"c\" and \"id\" as in \"x\", thus\n#' # copied variables from \"y\" to \"x\" copy no values, all get NA.\n#' data_merge(x, y, join = \"left\")\n#'\n#' # one case in \"y\" has a match in \"id\" with \"x\", thus values for this\n#' # case from the remaining variables in \"y\" are copied to \"x\", all other\n#' # values (cases) in those remaining variables get NA\n#' data_merge(x, y, join = \"left\", by = \"id\")\n#'\n#' data(mtcars)\n#' x <- mtcars[1:5, 1:3]\n#' y <- mtcars[28:32, 4:6]\n#'\n#' # add ID common column\n#' x$id <- 1:5\n#' y$id <- 3:7\n#'\n#' # left-join, add new variables and copy values from y to x,\n#' # where \"id\" values match\n#' data_merge(x, y)\n#'\n#' # right-join, add new variables and copy values from x to y,\n#' # where \"id\" values match\n#' data_merge(x, y, join = \"right\")\n#'\n#' # full-join\n#' data_merge(x, y, join = \"full\")\n#'\n#'\n#' data(mtcars)\n#' x <- mtcars[1:5, 1:3]\n#' y <- mtcars[28:32, c(1, 4:5)]\n#'\n#' # add ID common column\n#' x$id <- 1:5\n#' y$id <- 3:7\n#'\n#' # left-join, no matching rows (because columns \"id\" and \"disp\" are used)\n#' # new variables get all NA values\n#' data_merge(x, y)\n#'\n#' # one common value in \"mpg\", so one row from y is copied to x\n#' data_merge(x, y, by = \"mpg\")\n#'\n#' # only keep rows with matching values in by-column\n#' data_merge(x, y, join = \"semi\", by = \"mpg\")\n#'\n#' # only keep rows with non-matching values in by-column\n#' data_merge(x, y, join = \"anti\", by = \"mpg\")\n#'\n#' # merge list of data frames. can be of different rows\n#' x <- mtcars[1:5, 1:3]\n#' y <- mtcars[28:31, 3:5]\n#' z <- mtcars[11:18, c(1, 3:4, 6:8)]\n#' x$id <- 1:5\n#' y$id <- 4:7\n#' z$id <- 3:10\n#' data_merge(list(x, y, z), join = \"bind\", by = \"id\", id = \"source\")\n#' @inherit data_rename seealso\n#' @export\ndata_merge <- function(x, ...) {\n  UseMethod(\"data_merge\")\n}\n\n#' @rdname data_merge\n#' @export\ndata_join <- data_merge\n\n#' @rdname data_merge\n#' @export\ndata_merge.data.frame <- function(\n  x,\n  y,\n  join = \"left\",\n  by = NULL,\n  id = NULL,\n  verbose = TRUE,\n  ...\n) {\n  class_x <- class(x)\n\n  # save variable attributes\n  attr_x_vars <- lapply(x, attributes)\n  attr_y_vars <- lapply(y, attributes)\n  attr_vars <- c(\n    attr_x_vars,\n    attr_y_vars[names(attr_y_vars)[!names(attr_y_vars) %in% names(attr_x_vars)]]\n  )\n\n  # check join-argument ----------------------\n\n  join <- match.arg(\n    join,\n    choices = c(\"full\", \"left\", \"right\", \"inner\", \"semi\", \"anti\", \"bind\")\n  )\n\n  # check id-argument ----------------------\n\n  all_columns <- union(colnames(x), colnames(y))\n\n  if (join == \"bind\" && !is.null(id) && id %in% all_columns) {\n    # ensure unique ID\n    id <- make.unique(c(all_columns, id), sep = \"_\")[length(all_columns) + 1]\n    # and also tell user...\n    if (isTRUE(verbose)) {\n      insight::format_warning(\n        sprintf(\n          \"Value of `id` already exists as column name. ID column was renamed to `%s`.\",\n          id\n        )\n      )\n    }\n  }\n\n  if (!is.null(id) && join == \"bind\") {\n    x[[id]] <- 1\n    y[[id]] <- 2\n  }\n\n  # check merge columns (\"by\"-argument) ----------------------\n\n  if (join != \"bind\") {\n    # we need a value for \"by\". If not provided, use all shared column names\n    if (is.null(by)) {\n      by <- intersect(colnames(x), colnames(y))\n    }\n\n    # If not all column names specified in \"by\" are present, yield warning\n    # and use all shared column names\n    if (!all(by %in% colnames(x)) || !all(by %in% colnames(y))) {\n      missing_in_x <- setdiff(by, colnames(x))\n      missing_in_y <- setdiff(by, colnames(y))\n      stop_message <- c(\n        \"Not all columns specified in `by` were found in the data frames.\",\n        if (length(missing_in_x) > 0L) {\n          paste0(\n            \"Following columns are in `by` but absent in `x`: \",\n            text_concatenate(missing_in_x)\n          )\n        },\n        if (length(missing_in_y) > 0L) {\n          paste0(\n            \"Following columns are in `by` but absent in `y`: \",\n            text_concatenate(missing_in_y)\n          )\n        }\n      )\n      if (isTRUE(verbose)) {\n        insight::format_error(stop_message)\n      }\n    }\n\n    # if still both data frames have no common columns, do a full join\n    if (!length(by)) {\n      if (isTRUE(verbose)) {\n        insight::format_warning(\n          \"Found no matching columns in the data frames. Fully merging both data frames now.\",\n          \"Note that this can lead to unintended results, because rows in `x` and `y` are possibly duplicated.\",\n          \"You probably want to use `data_merge(x, y, join = \\\"bind\\\")` instead.\"\n        )\n      }\n      by <- NULL\n      join <- \"full\"\n    }\n  }\n\n  # check valid combination of \"join\" and \"by\" -----------------------\n\n  if (join %in% c(\"anti\", \"semi\") && (is.null(by) || length(by) != 1)) {\n    insight::format_error(\n      sprintf(\n        \"For `join = \\\"%s\\\"`, `by` needs to be a name of only one variable that is present in both data frames.\",\n        join\n      )\n    )\n  }\n\n  # merge --------------------\n\n  # for later sorting\n  if (join != \"bind\") {\n    if (nrow(x) > 0L) {\n      x$.data_merge_id_x <- seq_len(nrow(x))\n    }\n    if (nrow(y) > 0L) {\n      y$.data_merge_id_y <- (seq_len(nrow(y))) + nrow(x)\n    }\n  }\n  all_columns <- union(colnames(x), colnames(y))\n\n  out <- switch(\n    join,\n    full = merge(x, y, all = TRUE, sort = FALSE, by = by),\n    left = merge(x, y, all.x = TRUE, sort = FALSE, by = by),\n    right = merge(x, y, all.y = TRUE, sort = FALSE, by = by),\n    inner = merge(x, y, sort = FALSE, by = by),\n    semi = x[x[[by]] %in% y[[by]], , drop = FALSE],\n    anti = x[!x[[by]] %in% y[[by]], , drop = FALSE],\n    bind = .bind_data_frames(x, y)\n  )\n\n  # sort rows, add attributes, and return results -------------------------\n\n  if (\".data_merge_id_x\" %in% colnames(out)) {\n    # for full joins, we have no complete sorting id, but NAs for each\n    # data frame. we now \"merge\" the two sorting IDs from each data frame.\n    if (anyNA(out$.data_merge_id_x) && \".data_merge_id_y\" %in% colnames(out)) {\n      out$.data_merge_id_x[is.na(\n        out$.data_merge_id_x\n      )] <- out$.data_merge_id_y[is.na(out$.data_merge_id_x)]\n    }\n    out <- out[order(out$.data_merge_id_x), ]\n    out$.data_merge_id_x <- NULL\n    out$.data_merge_id_y <- NULL\n  }\n\n  # try to restore original column order as good as possible. Therefore, we\n  # first take all column names of the original input data frames, then\n  # we add all new columns, like duplicated from merging (name.x and name.y,\n  # if \"name\" was in both data frames, but not used in \"by\"), and then do a\n  # final check that all column names are present in \"out\" (e.g., \"name\" would)\n  # no longer be there if we have \"name.x\" and \"name.y\").\n\n  all_columns <- c(all_columns, setdiff(colnames(out), all_columns))\n  all_columns <- all_columns[all_columns %in% colnames(out)]\n  out <- out[all_columns]\n\n  # add back attributes\n  out <- .replace_attrs(out, attributes(y))\n  out <- .replace_attrs(out, attributes(x))\n\n  for (i in colnames(out)) {\n    if (is.list(attr_vars[[i]])) {\n      if (is.list(attributes(out[[i]]))) {\n        attributes(out[[i]]) <- utils::modifyList(\n          attr_vars[[i]],\n          attributes(out[[i]])\n        )\n      } else {\n        attributes(out[[i]]) <- attr_vars[[i]]\n      }\n    }\n  }\n\n  class(out) <- unique(c(class_x, \"data.frame\"))\n  out\n}\n\n\n#' @rdname data_merge\n#' @export\ndata_merge.list <- function(\n  x,\n  join = \"left\",\n  by = NULL,\n  id = NULL,\n  verbose = TRUE,\n  ...\n) {\n  out <- x[[1]]\n  df_id <- rep(1, times = nrow(out))\n\n  for (i in 2:length(x)) {\n    out <- data_merge(\n      out,\n      x[[i]],\n      join = join,\n      by = by,\n      id = NULL,\n      verbose = verbose,\n      ...\n    )\n    df_id <- c(df_id, rep(i, times = nrow(x[[i]])))\n  }\n\n  # we need separate handling for list of data frames and id-variable here\n  if (!is.null(id) && join == \"bind\") {\n    if (id %in% colnames(out)) {\n      # ensure unique ID\n      id <- make.unique(c(colnames(out), id), sep = \"_\")[\n        length(colnames(out)) + 1\n      ]\n      # and also tell user...\n      if (isTRUE(verbose)) {\n        insight::format_warning(\n          sprintf(\n            \"Value of `id` already exists as column name. ID column was renamed to `%s`.\",\n            id\n          )\n        )\n      }\n    }\n    out[[id]] <- df_id\n  }\n\n  out\n}\n\n\n.bind_data_frames <- function(x, y) {\n  # merge and sort. \"rbind()\" is faster than \"merge()\" if all columns present\n  if (all(colnames(x) %in% colnames(y)) && ncol(x) == ncol(y)) {\n    # we may have different column order\n    out <- rbind(x, y[match(colnames(x), colnames(y))])\n  } else {\n    # add ID for merging\n    if (nrow(x) > 0L) {\n      x$.data_merge_row <- seq_len(nrow(x))\n    }\n    if (nrow(y) > 0L) {\n      y$.data_merge_row <- (nrow(x) + 1):(nrow(x) + nrow(y))\n    }\n    merge_by <- intersect(colnames(x), colnames(y))\n    out <- merge(x, y, all = TRUE, sort = FALSE, by = merge_by)\n  }\n\n  # for empty df's, merge() may return an empty character vector\n  # make sure it's a data frame object.\n  if (!is.data.frame(out)) {\n    out <- as.data.frame(out)\n  }\n\n  if (\".data_merge_row\" %in% colnames(out)) {\n    out <- out[order(out$.data_merge_row), ]\n  }\n\n  out$.data_merge_row <- NULL\n  out\n}\n"
  },
  {
    "path": "R/data_modify.R",
    "content": "#' Create new variables in a data frame\n#'\n#' Create new variables or modify existing variables in a data frame. Unlike `base::transform()`, `data_modify()`\n#' can be used on grouped data frames, and newly created variables can be directly\n#' used.\n#'\n#' @param data A data frame\n#' @param ... One or more expressions that define the new variable name and the\n#' values or recoding of those new variables. These expressions can be one of:\n#' - A sequence of named, literal expressions, where the left-hand side refers\n#'   to the name of the new variable, while the right-hand side represent the\n#'   values of the new variable. Example: `Sepal.Width = center(Sepal.Width)`.\n#' - A vector of length 1 (which will be recycled to match the number of rows\n#'   in the data), or of same length as the data.\n#' - A variable that contains a value to be used. Example:\n#'   ```r\n#'   a <- \"abc\"\n#'   data_modify(iris, var_abc = a) # var_abc contains \"abc\"\n#'   ```\n#' - An expression can also be provided as string and wrapped in\n#'   `as_expr()`. Example:\n#'   ```r\n#'   data_modify(iris, as_expr(\"Sepal.Width = center(Sepal.Width)\"))\n#'   # or\n#'   a <- \"center(Sepal.Width)\"\n#'   data_modify(iris, Sepal.Width = as_expr(a))\n#'   # or\n#'   a <- \"Sepal.Width = center(Sepal.Width)\"\n#'   data_modify(iris, as_expr(a))\n#'   ```\n#'   Note that `as_expr()` is no real function, which cannot be used outside\n#'   of `data_modify()`, and hence it is not exported nor documented. Rather,\n#'   it is only used for internally processing expressions.\n#' - Using `NULL` as right-hand side removes a variable from the data frame.\n#'   Example: `Petal.Width = NULL`.\n#' - For data frames (including grouped ones), the function `n()` can be used to\n#'   count the number of observations and thereby, for instance, create index\n#'   values by using `id = 1:n()` or `id = 3:(n()+2)` and similar. Note that,\n#'   like `as_expr()`, `n()` is also no true function and cannot be used outside\n#'   of `data_modify()`.\n#'\n#' Note that newly created variables can be used in subsequent expressions,\n#' including `.at` or `.if`. See also 'Examples'.\n#'\n#' @param .at A character vector of variable names that should be modified. This\n#' argument is used in combination with the `.modify` argument. Note that only one\n#' of `.at` or `.if` can be provided, but not both at the same time. Newly created\n#' variables in `...` can also be selected, see 'Examples'.\n#' @param .if A function that returns `TRUE` for columns in the data frame where\n#' `.if` applies. This argument is used in combination with the `.modify` argument.\n#' Note that only one of `.at` or `.if` can be provided, but not both at the same\n#' time. Newly created variables in `...` can also be selected, see 'Examples'.\n#' @param .modify A function that modifies the variables defined in `.at` or `.if`.\n#' This argument is used in combination with either the `.at` or the `.if` argument.\n#' Note that the modified variable (i.e. the result from `.modify`) must be either\n#' of length 1 or of same length as the input variable.\n#'\n#' @note `data_modify()` can also be used inside functions. However, it is\n#' recommended to pass the recode-expression as character vector or list of\n#' characters.\n#'\n#' @examples\n#' data(efc)\n#' new_efc <- data_modify(\n#'   efc,\n#'   c12hour_c = center(c12hour),\n#'   c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),\n#'   c12hour_z2 = standardize(c12hour)\n#' )\n#' head(new_efc)\n#'\n#' # using strings instead of literal expressions\n#' new_efc <- data_modify(\n#'   efc,\n#'   as_expr(\"c12hour_c = center(c12hour)\"),\n#'   as_expr(\"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\"),\n#'   as_expr(\"c12hour_z2 = standardize(c12hour)\")\n#' )\n#' head(new_efc)\n#'\n#' # using a character vector, provided a variable\n#' xpr <- c(\n#'   \"c12hour_c = center(c12hour)\",\n#'   \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\",\n#'   \"c12hour_z2 = standardize(c12hour)\"\n#' )\n#' new_efc <- data_modify(efc, as_expr(xpr))\n#' head(new_efc)\n#'\n#' # using character strings, provided as variable\n#' stand <- \"c12hour_c / sd(c12hour, na.rm = TRUE)\"\n#' new_efc <- data_modify(\n#'   efc,\n#'   c12hour_c = center(c12hour),\n#'   c12hour_z = as_expr(stand)\n#' )\n#' head(new_efc)\n#'\n#' # attributes - in this case, value and variable labels - are preserved\n#' str(new_efc)\n#'\n#' # using `paste()` to build a string-expression\n#' to_standardize <- c(\"Petal.Length\", \"Sepal.Length\")\n#' out <- data_modify(\n#'   iris,\n#'   as_expr(\n#'     paste0(to_standardize, \"_stand = standardize(\", to_standardize, \")\")\n#'   )\n#' )\n#' head(out)\n#'\n#' # overwrite existing variable, remove old variable\n#' out <- data_modify(iris, Petal.Length = 1 / Sepal.Length, Sepal.Length = NULL)\n#' head(out)\n#'\n#' # works on grouped data\n#' grouped_efc <- data_group(efc, \"c172code\")\n#' new_efc <- data_modify(\n#'   grouped_efc,\n#'   c12hour_c = center(c12hour),\n#'   c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),\n#'   c12hour_z2 = standardize(c12hour),\n#'   id = 1:n()\n#' )\n#' head(new_efc)\n#'\n#' # works from inside functions\n#' foo1 <- function(data, ...) {\n#'   head(data_modify(data, ...))\n#' }\n#' foo1(iris, SW_fraction = Sepal.Width / 10)\n#' # or\n#' foo1(iris, as_expr(\"SW_fraction = Sepal.Width / 10\"))\n#'\n#' # also with string arguments, using `as_expr()`\n#' foo2 <- function(data, modification) {\n#'   head(data_modify(data, as_expr(modification)))\n#' }\n#' foo2(iris, \"SW_fraction = Sepal.Width / 10\")\n#'\n#' # modify at specific positions or if condition is met\n#' d <- iris[1:5, ]\n#' data_modify(d, .at = \"Species\", .modify = as.numeric)\n#' data_modify(d, .if = is.factor, .modify = as.numeric)\n#'\n#' # can be combined with dots\n#' data_modify(d, new_length = Petal.Length * 2, .at = \"Species\", .modify = as.numeric)\n#'\n#' # new variables used in `.at` or `.if`\n#' data_modify(\n#'   d,\n#'   new_length = Petal.Length * 2,\n#'   .at = c(\"Petal.Length\", \"new_length\"),\n#'   .modify = round\n#' )\n#'\n#' # combine \"extract_column_names()\" and \".at\" argument\n#' out <- data_modify(\n#'   d,\n#'   .at = extract_column_names(d, select = starts_with(\"Sepal\")),\n#'   .modify = as.factor\n#' )\n#' # \"Sepal.Length\" and \"Sepal.Width\" are now factors\n#' str(out)\n#'\n#' @export\ndata_modify <- function(data, ...) {\n  UseMethod(\"data_modify\")\n}\n\n\n#' @export\ndata_modify.default <- function(data, ...) {\n  insight::format_error(\"`data` must be a data frame.\")\n}\n\n\n#' @rdname data_modify\n#' @export\ndata_modify.data.frame <- function(\n  data,\n  ...,\n  .if = NULL,\n  .at = NULL,\n  .modify = NULL\n) {\n  dots <- eval(substitute(alist(...)))\n\n  # error for data frames with no rows...\n  if (nrow(data) == 0) {\n    insight::format_error(\n      \"`data` is an empty data frame. `data_modify()` only works for data frames with at least one row.\"\n    ) # nolint\n  }\n\n  # check if we have dots, or only at/modify ----\n\n  if (length(dots)) {\n    # Check if dots are named. Usually, all dots should be named, i.e. include\n    # the name of the new variable. There's only one exception, if a string is\n    # masked as expression, and this string includes the new name, e.g.\n    #\n    # data_modify(iris, as_expr(\"sepwid = 2 * Sepal.Width\"))\n    # a <- \"sepwid = 2 * Sepal.Width\"\n    # data_modify(iris, as_expr(a))\n    #\n    dots <- .process_unnamed_expressions(dots, data)\n\n    # next, we check for named expression-tags and convert these into regular\n    # expressions, e.g.\n    #\n    # data_modify(iris, sepwid =  = as_expr(\"2 * Sepal.Width\"))\n    # a <- \"2 * Sepal.Width\"\n    # data_modify(iris, sepwid = as_expr(a))\n    #\n    dots <- .process_named_expressions(dots, data)\n\n    for (i in seq_along(dots)) {\n      # create new variable\n      new_variable <- .get_new_dots_variable(dots, i, data)\n      # give informative error when new variable doesn't match number of rows\n      if (\n        !is.null(new_variable) &&\n          length(new_variable) != nrow(data) &&\n          (nrow(data) %% length(new_variable)) != 0\n      ) {\n        insight::format_error(\n          \"New variable has not the same length as the other variables in the data frame and cannot be recycled.\"\n        )\n      }\n      data[[names(dots)[i]]] <- new_variable\n    }\n  }\n\n  # check if we have at/modify ----\n  data <- .modify_at(data, .at, .if, .modify)\n\n  data\n}\n\n\n#' @export\ndata_modify.grouped_df <- function(\n  data,\n  ...,\n  .if = NULL,\n  .at = NULL,\n  .modify = NULL\n) {\n  # we need to evaluate dots here, and pass them with \"do.call\" to\n  # the data.frame method later...\n  dots <- match.call(expand.dots = FALSE)[[\"...\"]]\n\n  # error for data frames with no rows...\n  if (nrow(data) == 0) {\n    insight::format_error(\n      \"`data` is an empty data frame. `data_modify()` only works for data frames with at least one row.\"\n    ) # nolint\n  }\n\n  grps <- attr(data, \"groups\", exact = TRUE)\n  grps <- grps[[\".rows\"]]\n  attr_data <- attributes(data)\n\n  # remove conflicting class attributes\n  class_attr <- class(data)\n  data <- as.data.frame(data)\n\n  if (length(dots)) {\n    # check is dots are named. Usually, all dots should be named, i.e. include\n    # the name of the new variable. There's only one exception, if a string is\n    # masked as expression, and this string includes the new name, e.g.\n    #\n    # data_modify(iris, as_expr(\"sepwid = 2 * Sepal.Width\"))\n    # a <- \"sepwid = 2 * Sepal.Width\"\n    # data_modify(iris, as_expr(a))\n    #\n    dots <- .process_unnamed_expressions(dots, data)\n\n    # next, we check for named expression-tags and convert these into regular\n    # expressions, e.g.\n    #\n    # data_modify(iris, sepwid =  = as_expr(\"2 * Sepal.Width\"))\n    # a <- \"2 * Sepal.Width\"\n    # data_modify(iris, sepwid = as_expr(a))\n    #\n    dots <- .process_named_expressions(dots, data)\n  }\n\n  # create new variables as dummys, do for-loop works\n  for (i in names(dots)) {\n    # don't overwrite / fill existing variables with NA,\n    # e.g. if we have \"data_modify(iris, Sepal.Length = normalize(Sepal.Length))\"\n    # normalize() won't work when we fill with NA\n    if (!i %in% colnames(data)) {\n      data[[i]] <- NA\n    }\n  }\n\n  # create new variables per group\n  for (rows in grps) {\n    data[rows, ] <- data_modify.data.frame(data[rows, ], ...)\n  }\n\n  # check if we have at/modify ----\n  data <- .modify_at(data, .at, .if, .modify)\n\n  # set back attributes and class\n  data <- .replace_attrs(data, attr_data)\n  class(data) <- class_attr\n  data\n}\n\n\n# expression processing ----------------------------------------------------\n\n.process_unnamed_expressions <- function(dots, data) {\n  # dots are only unnamed, when the full expression is saved in a string,\n  # e.g. data_modify(iris, as_expr(\"sepwid = 2 * Sepal.Width\")).\n  # Thus, we know we *have to* find an expression here, and the string value\n  # *must* contain a name definition. If not, fail. If yes, convert string\n  # into a language expression...\n\n  if (!is.null(names(dots)) && all(nzchar(names(dots)))) {\n    # if all elements are named, return early\n    return(dots)\n  }\n\n  # find which dots are unnamed, check those for expressions\n  if (is.null(names(dots))) {\n    unnamed_dots <- seq_along(dots)\n  } else {\n    unnamed_dots <- which(!nzchar(names(dots)))\n  }\n\n  for (i in rev(unnamed_dots)) {\n    # copy dot-element and convert to string for manipulation\n    dot_element <- dots[[i]]\n    symbol_string <- insight::safe_deparse(dot_element)\n    # sanity check - this may happen when user wants to remove a variable,\n    # e.g. data_modify(iris, as_expr(\"Species = NULL\"))\n    if (is.null(symbol_string)) {\n      next\n    }\n    # we only allow unnamed elements if these are masked as expression. String\n    # values or numeric values require a named element, i.e. we can only have\n    # data_modify(iris, newvar = \"a\"), but we cannot have data_modify(iris, \"a\").\n    # For expression, missing name is possible.\n    if (!startsWith(symbol_string, \"as_expr\")) {\n      insight::format_error(paste0(\n        \"A variable name for the expression `\",\n        symbol_string,\n        \"` is missing. \",\n        \"Please use something like `new_name = \",\n        symbol_string,\n        \"`.\"\n      ))\n    }\n    # next, check if the string-expression includes a name for the new variable\n    # therefore, we remove the \"as_expr()\" token\n    if (startsWith(symbol_string, \"as_expr\")) {\n      symbol_string <- insight::trim_ws(\n        gsub(\"as_expr\\\\((.*)\\\\)\", \"\\\\1\", symbol_string)\n      )\n    }\n    # remove c(), split at comma, if we have a vector of expressions\n    if (startsWith(symbol_string, \"c(\")) {\n      symbol_string <- gsub(\"c\\\\((.*)\\\\)\", \"\\\\1\", symbol_string)\n      # only split at highest-level comma\n      pattern <- \",(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)\" # suggestion by Co-pilot\n      # Locate commas not inside quotes\n      symbol_string <- insight::trim_ws(unlist(\n        strsplit(symbol_string, pattern, perl = TRUE),\n        use.names = FALSE\n      ))\n    }\n    # check if we have any symbols instead of strings as expression, e.g.\n    # xpr <- \"sepwid = 2 * Sepal.Width\"\n    # data_modify(iris, as_expr(xpr))\n    #\n    # in this case, we need to evaluate the symbol (i.e. convert symbol string\n    # into a language expression and then evaluate)\n    symbol_string <- .evaluate_expression_in_string(symbol_string, data)\n    # check whether we have exact one = sign. We need to have a name definition,\n    # i.e. something like \"var = a+b\" - if the string has no \"=\" sign, name is\n    # definitely missing\n    pattern <- \"(?<!=)=(?!=)\"\n    has_names <- grepl(pattern, symbol_string, perl = TRUE)\n    if (!all(has_names)) {\n      insight::format_error(paste0(\n        \"A variable name for the expression `\",\n        symbol_string[!has_names[1]],\n        \"` is missing. \",\n        \"Please use something like `new_name = \",\n        symbol_string[!has_names[1]],\n        \"`.\"\n      ))\n    }\n    # extract names (split at =), separate name from new variable from its\n    # expression. we need this to create a named list of expressions for the dots\n    symbol_string <- lapply(\n      strsplit(symbol_string, \"=\", fixed = TRUE),\n      function(split_result) {\n        # we may have multiple \"=\" signs, e.g. when we have the pattern\n        # \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\". In this case\n        # paste remaining parts to one string again\n        first_part <- split_result[1]\n        second_part <- paste(split_result[-1], collapse = \"=\")\n        insight::trim_ws(c(first_part, second_part))\n      }\n    )\n    # extract names (LHS)\n    symbol_names <- vapply(symbol_string, function(i) i[1], character(1))\n    # extract expressions (RHS)\n    symbol_string <- lapply(symbol_string, function(i) {\n      str2lang(.fix_quotes(i[2]))\n    })\n    names(symbol_string) <- symbol_names\n    # copy to dots... if we have a character vector, one dot element may\n    # return more than one expression elements. Thus, we have to insert /\n    # replace the old element by one or more new elements\n    if (length(dots) == 1) {\n      return_value <- symbol_string\n    } else if (i == 1) {\n      return_value <- c(symbol_string, dots[(i + 1):length(dots)])\n    } else if (i == length(dots)) {\n      return_value <- c(dots[1:(i - 1)], symbol_string)\n    } else {\n      return_value <- c(\n        dots[1:(i - 1)],\n        symbol_string,\n        dots[(i + 1):length(dots)]\n      )\n    }\n    dots <- return_value\n  }\n  dots\n}\n\n\n.process_named_expressions <- function(dots, data) {\n  # this is basically a shorter version of \".process_unnamed_expressions()\",\n  # because we don't need to extract the name definition of the string, which\n  # makes the handling easier. See \".process_unnamed_expressions()\" for a more\n  # comprehensive documentation of the single steps.\n\n  for (i in seq_along(dots)) {\n    dot_element <- dots[[i]]\n    symbol_string <- insight::safe_deparse(dot_element)\n    # sanity check, this may happen when user wants to remove a variable\n    # e.g. data_modify(iris, Species = NULL)\n    if (is.null(symbol_string)) {\n      next\n    }\n    # extract string-expression, if we have any\n    if (startsWith(symbol_string, \"as_expr\")) {\n      symbol_string <- gsub(\"as_expr\\\\((.*)\\\\)\", \"\\\\1\", symbol_string)\n    } else {\n      # no expression token found\n      symbol_string <- NULL\n    }\n    # here we found an expression token - convert string into a regular expression\n    if (!is.null(symbol_string)) {\n      # check if we have any symbols instead of strings as expression\n      symbol_string <- .evaluate_expression_in_string(symbol_string, data)\n      # remove quotes from strings and save symbol name\n      symbol_string <- .fix_quotes(symbol_string)\n      symbol_name <- names(dots)[i]\n      # convert string into language and replace in dots\n      return_value <- try(str2lang(symbol_string), silent = TRUE)\n      # sanity check - for invalid expressions, like\n      # data_modify(iris, a = as_expr(c(\"1 + 1\", \"2 + 2\")))\n      # we get an error here\n      if (inherits(return_value, \"try-error\")) {\n        insight::format_error(paste0(\n          \"Could not evaluate expression `\",\n          symbol_string[1],\n          \"`. \",\n          \"Please check if it's correctly specified. If you think there's a bug \",\n          \"in `data_modify()`, please file an issue at {.url https://github.com/easystats/datawizard/issues}\"\n        ))\n      }\n      dots[[i]] <- return_value\n      names(dots)[i] <- symbol_name\n    }\n  }\n  dots\n}\n\n\n# helper -------------\n\n.evaluate_expression_in_string <- function(symbol_string, data) {\n  # check if we have any symbols instead of strings as expression, e.g.\n  # xpr <- \"sepwid = 2 * Sepal.Width\"\n  # data_modify(iris, as_expr(xpr))\n  #\n  # in this case, we need to evaluate the symbol (i.e. convert symbol string\n  # into a language expression and then evaluate)\n  symbol_string <- unlist(\n    lapply(symbol_string, function(symbol_element) {\n      if (startsWith(symbol_element, \"\\\"\")) {\n        symbol_element\n      } else {\n        return_value <- .dynEval(str2lang(symbol_element))\n        # dynEval might fail if we don't look in data - sanity check\n        if (identical(return_value, symbol_element)) {\n          return_value <- .dynEval(str2lang(symbol_element), data = data)\n        }\n        return_value\n      }\n    }),\n    use.names = FALSE\n  )\n  # now we should have the expression as character string. Next, we\n  # # remove quotes from strings\n  gsub(\"^\\\"(.*)\\\"$\", \"\\\\1\", symbol_string)\n}\n\n\n.fix_quotes <- function(symbol_string) {\n  # if user uses double-quotes inside double-quotes, these are escaped by\n  # \"\\\", e.g. data_modify(iris, foo = as_expr(\"grepl(\\\"a\\\", Species)\"))\n  # In this case, we have double-backslashes, which need to be removed.\n  # Furthermore, to avoid adding back backslashes, we replace by single quotes\n  # Using single quotes inside a string, even if escaped with backslash, is no\n  # problem here. Main issue is that if a string is parsed, double-quotes are\n  # *always* escaped, so we just need to make sure we only have single quotes\n  # and then remove all backslashes\n  gsub(\"\\\\\", \"\", gsub(\"\\\"\", \"'\", symbol_string, fixed = TRUE), fixed = TRUE)\n}\n\n\n.modify_at <- function(data, .at, .if, .modify) {\n  # check if \".at\" or \".if\" is defined, but not \".modify\"\n  if (is.null(.modify)) {\n    if (!is.null(.at) || !is.null(.if)) {\n      insight::format_error(\n        \"You need to specify `.modify` when using `.at` or `.if`.\"\n      )\n    }\n    return(data)\n  }\n  # make sure \"modify\" is a function\n  if (!is.function(.modify)) {\n    insight::format_error(\"`.modify` must be a function.\")\n  }\n  # make sure either .at or .if is defined, not both\n  if (!is.null(.at) && !is.null(.if)) {\n    insight::format_error(\n      \"You cannot use both `.at` and `.if` at the same time.\"\n    )\n  }\n  # make sure at least one of .at or .if is defined\n  if (is.null(.at) && is.null(.if)) {\n    insight::format_error(\"You need to specify either `.at` or `.if`.\")\n  }\n\n  column_names <- colnames(data)\n\n  # if we have \".if\" defined, specify \".at\"\n  if (!is.null(.if)) {\n    .at <- column_names[vapply(data, .if, logical(1))]\n  }\n  # check for valid defined column names\n  if (!all(.at %in% column_names)) {\n    not_found <- .at[!.at %in% column_names]\n    insight::format_error(\n      paste0(\n        \"Variable\",\n        ifelse(length(not_found) > 1, \"s \", \" \"),\n        text_concatenate(not_found, enclose = \"\\\"\"),\n        ifelse(length(not_found) > 1, \" were\", \" was\"),\n        \" not found in the dataset.\"\n      ),\n      .misspelled_string(\n        column_names,\n        not_found,\n        \"Possibly misspelled or not yet defined?\"\n      )\n    )\n  }\n  for (i in .at) {\n    result <- tryCatch(\n      .modify(data[[i]]),\n      warning = function(e) e,\n      error = function(e) e\n    )\n    if (inherits(result, c(\"error\", \"warning\"))) {\n      insight::format_error(\n        paste0(\"Error in modifying variable \\\"\", i, \"\\\": \", result$message),\n        \"Please check if you correctly specified the `.modify` function.\"\n      )\n    } else {\n      data[[i]] <- result\n    }\n  }\n\n  data\n}\n\n\n.get_new_dots_variable <- function(dots, i, data) {\n  # iterate expressions for new variables\n  symbol <- dots[[i]]\n\n  # we evaluate the content of \"symbol\", hence, \"eval_symbol\" either contains\n  # the values of the expression, or the expression itself as string\n  eval_symbol <- .dynEval(symbol, ifnotfound = NULL, data = data)\n\n  # finally, we can evaluate expression and get values for new variables\n  symbol_string <- insight::safe_deparse(symbol)\n  if (!is.null(symbol_string) && all(symbol_string == \"n()\")) {\n    # \"special\" functions - using \"n()\" just returns number of rows\n    new_variable <- nrow(data)\n  } else if (\n    !is.null(symbol_string) &&\n      length(symbol_string) == 1 &&\n      grepl(\"\\\\bn\\\\(\\\\)\", symbol_string)\n  ) {\n    # \"special\" functions, like \"1:n()\" or similar - but not \"1:fun()\"\n    symbol_string <- str2lang(gsub(\n      \"n()\",\n      \"nrow(data)\",\n      symbol_string,\n      fixed = TRUE\n    ))\n    new_variable <- try(with(data, eval(symbol_string)), silent = TRUE)\n  } else {\n    # evaluate symbol\n    new_variable <- try(with(data, eval(symbol)), silent = TRUE)\n    # if evaluation fails, we have a value - and directly use it\n    if (inherits(new_variable, \"try-error\") && !is.null(eval_symbol)) {\n      new_variable <- eval_symbol\n    }\n  }\n\n  # successful, or any errors, like misspelled variable name?\n  if (inherits(new_variable, \"try-error\")) {\n    # in which step did error happen?\n    step_number <- switch(\n      as.character(i),\n      \"1\" = \"the first expression\",\n      \"2\" = \"the second expression\",\n      \"3\" = \"the third expression\",\n      paste(\"expression\", i)\n    )\n    step_msg <- paste0(\"There was an error in \", step_number, \".\")\n    # try to find out which variable was the cause for the error\n    error_msg <- attributes(new_variable)$condition$message\n    if (grepl(\"object '(.*)' not found\", error_msg)) {\n      error_var <- gsub(\"object '(.*)' not found\", \"\\\\1\", error_msg)\n      insight::format_error(\n        paste0(\n          step_msg,\n          \" Variable \\\"\",\n          error_var,\n          \"\\\" was not found in the dataset or in the environment.\"\n        ),\n        .misspelled_string(\n          colnames(data),\n          error_var,\n          \"Possibly misspelled or not yet defined?\"\n        )\n      )\n    } else {\n      insight::format_error(paste0(\n        step_msg,\n        \" \",\n        insight::format_capitalize(error_msg),\n        \". Possibly misspelled or not yet defined?\"\n      ))\n    }\n  }\n\n  new_variable\n}\n"
  },
  {
    "path": "R/data_partition.R",
    "content": "#' Partition data\n#'\n#' Creates data partitions (for instance, a training and a test set) based on a\n#' data frame that can also be stratified (i.e., evenly spread a given factor)\n#' using the `by` argument.\n#'\n#' @inheritParams data_rename\n#' @param proportion Scalar (between 0 and 1) or numeric vector, indicating the\n#'   proportion(s) of the training set(s). The sum of `proportion` must not be\n#'   greater than 1. The remaining part will be used for the test set.\n#' @param by A character vector indicating the name(s) of the column(s) used\n#'   for stratified partitioning.\n#' @param seed A random number generator seed. Enter an integer (e.g. 123) so\n#'   that the random sampling will be the same each time you run the function.\n#' @param row_id Character string, indicating the name of the column that\n#'   contains the row-id's.\n#' @param verbose Toggle messages and warnings.\n#'\n#' @return A list of data frames. The list includes one training set per given\n#'   proportion and the remaining data as test set. List elements of training\n#'   sets are named after the given proportions (e.g., `$p_0.7`), the test set\n#'   is named `$test`.\n#'\n#' @examples\n#' data(iris)\n#' out <- data_partition(iris, proportion = 0.9)\n#' out$test\n#' nrow(out$p_0.9)\n#'\n#' # Stratify by group (equal proportions of each species)\n#' out <- data_partition(iris, proportion = 0.9, by = \"Species\")\n#' out$test\n#'\n#' # Create multiple partitions\n#' out <- data_partition(iris, proportion = c(0.3, 0.3))\n#' lapply(out, head)\n#'\n#' # Create multiple partitions, stratified by group - 30% equally sampled\n#' # from species in first training set, 50% in second training set and\n#' # remaining 20% equally sampled from each species in test set.\n#' out <- data_partition(iris, proportion = c(0.3, 0.5), by = \"Species\")\n#' lapply(out, function(i) table(i$Species))\n#'\n#' @inherit data_rename seealso\n#' @export\ndata_partition <- function(\n  data,\n  proportion = 0.7,\n  by = NULL,\n  seed = NULL,\n  row_id = \".row_id\",\n  verbose = TRUE,\n  ...\n) {\n  # validation checks\n  data <- .coerce_to_dataframe(data)\n\n  if (sum(proportion) > 1) {\n    insight::format_error(\"Sum of `proportion` cannot be higher than 1.\")\n  }\n  if (any(proportion < 0)) {\n    insight::format_error(\"Values in `proportion` cannot be negative.\")\n  }\n  if (sum(proportion) == 1 && isTRUE(verbose)) {\n    insight::format_warning(\n      \"Proportions of sampled training sets (`proportion`) sums up to 1, so no test set will be generated.\"\n    )\n  }\n\n  if (is.null(row_id)) {\n    row_id <- \".row_id\"\n  }\n\n  # check that name of row-id doesn't exist to prevent existing data\n  # from overwriting. create new unique name for row-id then...\n  if (row_id %in% colnames(data)) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\n        paste0(\"A variable named \\\"\", row_id, \"\\\" already exists.\"),\n        \"Changing the value of `row_id` to a unique variable name now.\"\n      )\n    }\n    unique_names <- make.unique(c(colnames(data), row_id), sep = \"_\")\n    row_id <- unique_names[length(unique_names)]\n  }\n\n  if (!is.null(seed)) {\n    set.seed(seed)\n  }\n\n  # add row-id column\n  data[[row_id]] <- seq_len(nrow(data))\n\n  # Create list of data groups. We generally lapply over list of\n  # sampled row-id's by group, thus, we even create a list if not grouped.\n  if (is.null(by)) {\n    indices_list <- list(seq_len(nrow(data)))\n  } else {\n    # else, split by group(s) and extract row-ids per group\n    indices_list <- lapply(\n      split(data, data[by]),\n      data_extract,\n      select = row_id,\n      as_data_frame = FALSE\n    )\n  }\n\n  # iterate over (grouped) row-id's\n  training_sets <- lapply(indices_list, function(i) {\n    # return value, list of data frames\n    d <- list()\n\n    # row-id's by groups\n    indices <- i\n\n    # check length of group (= data)\n    n <- length(indices)\n\n    # iterate probabilities. we use for/next, so we can change\n    # the \"indices\" variable, where we remove already sampled id's\n    for (p in proportion) {\n      # training-id's, sampled from id's per group - size is % within each group\n      training <- sort(sample(indices, round(n * p)))\n\n      # remove already sampled id's from group-indices\n      indices <- setdiff(indices, training)\n\n      # each training set data frame as one list element\n      d[[length(d) + 1]] <- data[training, ]\n    }\n    d\n  })\n\n  # we need to move all list elements one level higher.\n  if (is.null(by)) {\n    training_sets <- training_sets[[1]]\n  } else {\n    # for grouped training sets, we need to row-bind all sampled training\n    # sets from each group. currently, we have a list of data frames,\n    # grouped by \"group\"; but we want one data frame per proportion that\n    # contains sampled rows from all groups.\n    training_sets <- lapply(seq_along(proportion), function(p) {\n      do.call(rbind, lapply(training_sets, function(i) i[[p]]))\n    })\n  }\n\n  # use probabilies as element names\n  names(training_sets) <- sprintf(\"p_%g\", proportion)\n\n  # remove all training set id's from data, add remaining data (= test set)\n  all_ids <- lapply(\n    training_sets,\n    data_extract,\n    select = row_id,\n    as_data_frame = FALSE\n  )\n  out <- c(\n    training_sets,\n    list(test = data[-unlist(all_ids, use.names = FALSE), ])\n  )\n\n  lapply(out, `row.names<-`, NULL)\n}\n"
  },
  {
    "path": "R/data_peek.R",
    "content": "#' @title Peek at values and type of variables in a data frame\n#' @name data_peek\n#'\n#' @description This function creates a table a data frame, showing all\n#' column names, variable types and the first values (as many as fit into\n#' the screen).\n#'\n#' @param x A data frame.\n#' @param width Maximum width of line length to display. If `NULL`, width will\n#' be determined using `options()$width`.\n#' @param ... not used.\n#' @inheritParams extract_column_names\n#'\n#' @note To show only specific or a limited number of variables, use the\n#' `select` argument, e.g. `select = 1:5` to show only the first five variables.\n#'\n#' @return A data frame with three columns, containing information about\n#' the name, type and first values of the input data frame.\n#'\n#' @examples\n#' data(efc)\n#' data_peek(efc)\n#' # show variables two to four\n#' data_peek(efc, select = 2:4)\n#' @export\ndata_peek <- function(x, ...) {\n  UseMethod(\"data_peek\")\n}\n\n\n#' @rdname data_peek\n#' @export\ndata_peek.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  width = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n  out <- do.call(\n    rbind,\n    lapply(select, function(i) {\n      .data_peek(x, i, width, verbose = verbose, ...)\n    })\n  )\n\n  class(out) <- c(\"dw_data_peek\", class(out))\n  attr(out, \"n_cols\") <- ncol(x)\n  attr(out, \"n_rows\") <- nrow(x)\n  attr(out, \"max_width\") <- ifelse(is.null(width), 0.9 * options()$width, width)\n\n  out\n}\n\n\n# methods -----------------\n\n#' @export\nprint.dw_data_peek <- function(x, ...) {\n  x <- format(x, ...)\n  caption <- sprintf(\n    \"Data frame with %i rows and %i variables\",\n    attributes(x)$n_rows,\n    attributes(x)$n_cols\n  )\n  cat(insight::export_table(x, align = \"lll\", caption = caption, ...))\n}\n\n#' @export\nprint_md.dw_data_peek <- function(x, ...) {\n  x <- format(x, ...)\n  caption <- sprintf(\n    \"Data frame with %i rows and %i variables\",\n    attributes(x)$n_rows,\n    attributes(x)$n_cols\n  )\n  insight::export_table(\n    x,\n    align = \"lll\",\n    format = \"markdown\",\n    caption = caption,\n    ...\n  )\n}\n\n#' @export\nprint_html.dw_data_peek <- function(x, ...) {\n  x <- format(x, ...)\n  caption <- sprintf(\n    \"Data frame with %i rows and %i variables\",\n    attributes(x)$n_rows,\n    attributes(x)$n_cols\n  )\n  insight::export_table(\n    x,\n    align = \"lll\",\n    format = \"html\",\n    caption = caption,\n    ...\n  )\n}\n\n#' @export\nformat.dw_data_peek <- function(x, ...) {\n  width_col1 <- max(nchar(x$Variable))\n  width_col2 <- max(nchar(x$Type))\n  max_width <- attributes(x)$max_width\n  if (is.null(max_width)) {\n    max_width <- 0.9 * options()$width\n  }\n  width_col3 <- max_width - (width_col1 + width_col2 + 10) # 10 = separator chars in table\n\n  # shorten value-string\n  x$Values <- substr(x$Values, 0, width_col3)\n  # make sure we have a clear truncation, at last \"comma\"\n  x$Values <- gsub(\"(.+)(,.+)$\", \"\\\\1\", x$Values)\n  # add \"...\"\n  x$Values <- paste0(x$Values, \", ...\")\n\n  x\n}\n\n\n# helper -----------------\n\n.data_peek <- function(x, variable, width = NULL, verbose = TRUE, ...) {\n  v_name <- variable\n  v_type <- .variable_type(x[[variable]])\n  v_type[v_type == \"categorical\"] <- \"factor\"\n\n  max_width <- ifelse(is.null(width), 0.9 * options()$width, width)\n  v_values <- toString(x[[variable]][1:max_width])\n\n  data.frame(\n    Variable = v_name,\n    Type = v_type,\n    Values = v_values,\n    stringsAsFactors = FALSE\n  )\n}\n"
  },
  {
    "path": "R/data_read.R",
    "content": "#' @title Read (import) data files from various sources\n#' @name data_read\n#'\n#' @description\n#' This functions imports data from various file types. It is a small wrapper\n#' around `haven::read_spss()`, `haven::read_stata()`, `haven::read_sas()`,\n#' `readxl::read_excel()` and `data.table::fread()` resp. `readr::read_delim()`\n#' (the latter if package **data.table** is not installed). Thus, supported file\n#' types for importing data are data files from SPSS, SAS or Stata, Excel files\n#' or text files (like '.csv' files). All other file types are passed to\n#' `rio::import()`. `data_write()` works in a similar way.\n#'\n#' @param path Character string, the file path to the data file.\n#' @param path_catalog Character string, path to the catalog file. Only relevant\n#' for SAS data files.\n#' @param encoding The character encoding used for the file. Usually not needed.\n#' @param convert_factors If `TRUE` (default), numeric variables, where all\n#' values have a value label, are assumed to be categorical and converted into\n#' factors. If `FALSE`, no variable types are guessed and no conversion of\n#' numeric variables into factors will be performed. For `data_read()`, this\n#' argument only applies to file types with *labelled data*, e.g. files from\n#' SPSS, SAS or Stata. See also section 'Differences to other packages'. For\n#' `data_write()`, this argument only applies to the text (e.g. `.txt` or\n#' `.csv`) or spreadsheet file formats (like `.xlsx`). Converting to factors\n#' might be useful for these formats because labelled numeric variables are then\n#' converted into factors and exported as character columns - else, value labels\n#' would be lost and only numeric values are written to the file.\n#' @param password Password for data encryption. If not `NULL`, the data will be\n#' encrypted (for `data_write()`) or decrypted (for `data_read()`) using the\n#' provided password. Encryption is currently only supported for R file formats\n#' (`.rds`, `.rda` and `.rdata`). See the section \"Data encryption\" below for more\n#' information on the encryption method used.\n#' @param verbose Toggle warnings and messages.\n#' @param ... Arguments passed to the related `read_*()` or `write_*()` functions.\n#'\n#' @return A data frame.\n#'\n#' @section Supported file types:\n#' - `data_read()` is a wrapper around the **haven**, **data.table**, **readr**\n#'   **readxl**, **nanoparquet** and **rio** packages. Currently supported file\n#'   types are `.txt`, `.csv`, `.xls`, `.xlsx`, `.sav`, `.por`, `.dta`, `.sas`,\n#'   `.rda`, `.parquet`, `.rdata`, and `.rds` (and related files). All other file\n#'   types are passed to `rio::import()`.\n#' - `data_write()` is a wrapper around **haven**, **readr**, **nanoparquet**,\n#'   and **rio** packages, and supports writing files into all formats supported\n#'   by these packages.\n#'\n#' @section Compressed files (zip) and URLs:\n#' `data_read()` can also read the above mentioned files from URLs or from\n#' inside zip-compressed files. Thus, `path` can also be a URL to a file like\n#' `\"http://www.url.com/file.csv\"`. When `path` points to a zip-compressed file,\n#' and there are multiple files inside the zip-archive, then the first supported\n#' file is extracted and loaded.\n#'\n#' @section General behaviour:\n#' `data_read()` detects the appropriate `read_*()` function based on the\n#' file-extension of the data file. Thus, in most cases it should be enough to\n#' only specify the `path` argument. However, if more control is needed, all\n#' arguments in `...` are passed down to the related `read_*()` function. The\n#' same applies to `data_write()`, i.e. based on the file extension provided in\n#' `path`, the appropriate `write_*()` function is used automatically.\n#'\n#' @section SPSS specific behaviour:\n#' `data_read()` does *not* import user-defined (\"tagged\") `NA` values from\n#' SPSS, i.e. argument `user_na` is always set to `FALSE` when importing SPSS\n#' data with the **haven** package. Use `convert_to_na()` to define missing\n#' values in the imported data, if necessary. Furthermore, `data_write()`\n#' compresses SPSS files by default. If this causes problems with (older) SPSS\n#' versions, use `compress = \"none\"`, for example\n#' `data_write(data, \"myfile.sav\", compress = \"none\")`.\n#'\n#' @section Differences to other packages that read foreign data formats:\n#' `data_read()` is most comparable to `rio::import()`. For data files from\n#' SPSS, SAS or Stata, which support labelled data, variables are converted into\n#' their most appropriate type. The major difference to `rio::import()` is for\n#' data files from SPSS, SAS, or Stata, i.e. file types that support\n#' *labelled data*. `data_read()` automatically converts fully labelled numeric\n#' variables into factors, where imported value labels will be set as factor\n#' levels. If a numeric variable has _no_ value labels or less value labels than\n#' values, it is not converted to factor. In this case, value labels are\n#' preserved as `\"labels\"` attribute. Character vectors are preserved. Use\n#' `convert_factors = FALSE` to remove the automatic conversion of numeric\n#' variables to factors.\n#'\n#' @section Data encryption:\n#' `data_read()` and `data_write()` support data encryption for R file formats\n#' (`.rds`, `.rda` and `.rdata`). To encrypt a file, provide a password to the\n#' `password` argument in `data_write()`. To decrypt the file, provide the same\n#' password to `data_read()`. The encryption is based on the **openssl** package\n#' and uses the AES-GCM algorithm (see `?openssl::aes_gcm_encrypt`) with a\n#' 256-bit key (see `?openssl::sha256`). Thus, data can also be decrypted without\n#' relying on the **datawizard** package, e.g. using following code:\n#'\n#' ```\n#' encrypted_data <- readRDS(datafile)\n#' key <- openssl::sha256(charToRaw(\"<password>\"))\n#' out <- openssl::aes_gcm_decrypt(encrypted_data, key = key)\n#' decrypted_data <- unserialize(out)\n#' ```\n#'\n#' **Warning:** Do not lose your `password`, else you will not be able to\n#' decrypt the data again!\n#'\n#' @export\ndata_read <- function(\n  path,\n  path_catalog = NULL,\n  encoding = NULL,\n  convert_factors = TRUE,\n  password = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # extract first valid file from zip-file\n  if (identical(.file_ext(path), \"zip\")) {\n    path <- .extract_zip(path)\n  }\n\n  # check for valid file type\n  file_type <- .file_ext(path)\n  if (!is.character(file_type) || file_type == \"\") {\n    insight::format_error(\n      \"Could not detect file type. The `path` argument has no file extension.\",\n      \"Please provide a file path including extension, like \\\"myfile.csv\\\" or \\\"c:/Users/Default/myfile.sav\\\".\"\n    )\n  }\n\n  # read data\n  out <- switch(\n    file_type,\n    txt = ,\n    csv = .read_text(path, encoding, verbose, ...),\n    rda = ,\n    rdata = .read_base_rda(path, file_type, password, verbose, ...),\n    rds = .read_base_rds(path, password, verbose, ...),\n    xls = ,\n    xlsx = .read_excel(path, encoding, verbose, ...),\n    sav = ,\n    por = .read_spss(path, encoding, convert_factors, verbose, ...),\n    dta = .read_stata(path, encoding, convert_factors, verbose, ...),\n    sas7bdat = .read_sas(\n      path,\n      path_catalog,\n      encoding,\n      convert_factors,\n      verbose,\n      ...\n    ),\n    parquet = .read_parquet(path, verbose, ...),\n    .read_unknown(path, file_type, verbose, ...)\n  )\n\n  # tell user about empty columns\n  if (verbose) {\n    empty_cols <- empty_columns(out)\n    # only message if we actually have empty columns\n    if (length(empty_cols)) {\n      insight::format_alert(\n        sprintf(\"Following %i variables are empty:\", length(empty_cols)),\n        text_concatenate(names(empty_cols)),\n        \"\\nUse `remove_empty_columns()` to remove them from the data frame.\"\n      )\n    }\n  }\n\n  out\n}\n\n\n# helper -----------------------\n\n.file_ext <- function(x) {\n  pos <- regexpr(\"\\\\.([[:alnum:]]+)$\", x)\n  ifelse(pos > -1L, tolower(substring(x, pos + 1L)), \"\")\n}\n\n\n.extract_zip <- function(path) {\n  files <- utils::unzip(path, list = TRUE)\n  files_ext <- vapply(files$Name, .file_ext, FUN.VALUE = character(1L))\n\n  supported_filetypes <- c(\"txt\", \"csv\", \"xls\", \"xlsx\", \"sav\", \"por\", \"dta\")\n  dest <- files$Name[which(files_ext %in% supported_filetypes)]\n\n  if (length(dest) > 0) {\n    d <- tempfile()\n    dir.create(d)\n    utils::unzip(path, exdir = d)\n    path <- file.path(d, dest[1])\n  } else {\n    insight::format_error(\n      \"The zip-file does not contain any supported file types.\"\n    )\n  }\n\n  path\n}\n\n\n# process imported data from SPSS, SAS or Stata -----------------------\n\n.post_process_imported_data <- function(x, convert_factors, verbose) {\n  # user may decide whether we automatically detect variable type or not\n  if (isTRUE(convert_factors)) {\n    if (verbose) {\n      msg <- \"Variables where all values have associated labels are now converted into factors. If this is not intended, use `convert_factors = FALSE`.\" # nolint\n      insight::format_alert(msg)\n    }\n    x[] <- lapply(x, function(i) {\n      # only proceed if not all missing\n      if (!all(is.na(i))) {\n        # save labels\n        value_labels <- attr(i, \"labels\", exact = TRUE)\n        variable_labels <- attr(i, \"label\", exact = TRUE)\n\n        # filter, so only matching value labels remain\n        value_labels <- value_labels[value_labels %in% unique(i)]\n\n        # guess variable type\n        if (is.character(i)) {\n          # we need this to drop haven-specific class attributes\n          i <- as.character(i)\n        } else if (\n          !is.null(value_labels) && length(value_labels) == insight::n_unique(i)\n        ) {\n          # if all values are labelled, we assume factor. Use labels as levels\n          if (is.numeric(i)) {\n            i <- factor(i, labels = names(value_labels))\n          } else {\n            i <- factor(as.character(i), labels = names(value_labels))\n          }\n          value_labels <- NULL\n          attr(i, \"converted_to_factor\") <- TRUE\n        } else {\n          # else, fall back to numeric or factor\n          i <- as.numeric(i)\n        }\n\n        # drop unused value labels\n        value_labels <- value_labels[value_labels %in% unique(i)]\n        if (length(value_labels) > 0L) {\n          attr(i, \"labels\") <- value_labels\n        }\n\n        # add back variable label\n        attr(i, \"label\") <- variable_labels\n      }\n      i\n    })\n    # tell user how many variables were converted\n    if (verbose) {\n      cnt <- sum(vapply(\n        x,\n        function(i) isTRUE(attributes(i)$converted_to_factor),\n        TRUE\n      ))\n      msg <- sprintf(\n        \"%i out of %i variables were fully labelled and converted into factors.\",\n        cnt,\n        ncol(x)\n      )\n      insight::format_alert(msg)\n    }\n  } else {\n    # drop haven class attributes\n    x[] <- lapply(x, function(i) {\n      # save labels\n      class(i) <- setdiff(class(i), c(\"haven_labelled\", \"vctrs_vctr\"))\n      i\n    })\n  }\n\n  class(x) <- \"data.frame\"\n  x\n}\n\n\n# read functions -----------------------\n\n.read_spss <- function(path, encoding, convert_factors, verbose, ...) {\n  insight::check_if_installed(\n    \"haven\",\n    reason = paste0(\"to read files of type '\", .file_ext(path), \"'\")\n  )\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n  out <- haven::read_sav(file = path, encoding = encoding, user_na = FALSE, ...)\n  .post_process_imported_data(out, convert_factors, verbose)\n}\n\n\n.read_stata <- function(path, encoding, convert_factors, verbose, ...) {\n  insight::check_if_installed(\n    \"haven\",\n    reason = paste0(\"to read files of type '\", .file_ext(path), \"'\")\n  )\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n  out <- haven::read_dta(file = path, encoding = encoding, ...)\n  .post_process_imported_data(out, convert_factors, verbose)\n}\n\n\n.read_sas <- function(\n  path,\n  path_catalog,\n  encoding,\n  convert_factors,\n  verbose,\n  ...\n) {\n  insight::check_if_installed(\n    \"haven\",\n    reason = paste0(\"to read files of type '\", .file_ext(path), \"'\")\n  )\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n  out <- haven::read_sas(\n    data_file = path,\n    catalog_file = path_catalog,\n    encoding = encoding,\n    ...\n  )\n  .post_process_imported_data(out, convert_factors, verbose)\n}\n\n\n.read_excel <- function(path, encoding, verbose, ...) {\n  insight::check_if_installed(\n    \"readxl\",\n    reason = paste0(\"to read files of type '\", .file_ext(path), \"'\")\n  )\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n  out <- readxl::read_excel(path, ...)\n  class(out) <- \"data.frame\"\n  out\n}\n\n\n.read_text <- function(path, encoding, verbose, ...) {\n  if (insight::check_if_installed(\"data.table\", quietly = TRUE)) {\n    # set proper default encoding-value for fread\n    if (is.null(encoding)) {\n      encoding <- \"unknown\"\n    }\n    out <- data.table::fread(input = path, encoding = encoding, ...)\n    return(as.data.frame(out))\n  }\n\n  insight::check_if_installed(\n    \"readr\",\n    reason = paste0(\"to read files of type '\", .file_ext(path), \"'\")\n  )\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n  out <- readr::read_delim(path, ...)\n  as.data.frame(out)\n}\n\n\n.read_unknown <- function(path, file_type, verbose, ...) {\n  insight::check_if_installed(\n    \"rio\",\n    reason = paste0(\"to read files of type '\", file_type, \"'\")\n  )\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n  # set up arguments. for RDS, we set trust = TRUE, to avoid warnings\n  rio_args <- list(file = path)\n  # check if we have RDS, and if so, add trust = TRUE\n  if (file_type %in% c(\"rds\", \"rdata\", \"rda\")) {\n    rio_args$trust <- TRUE\n  }\n  out <- do.call(rio::import, c(rio_args, list(...)))\n\n  # check if loaded file is a data frame, or not (e.g. model objects)\n  # it returns `NULL` if the file is no valid data file that contains a data\n  # frame.frame, or cannot be coerced to a data frame. Else, if it was a data\n  # frame or could be coerced into one, the (new) data frame is returned. In\n  # this case, we overwrite \"out\", else we keep its original object.\n  valid_data_object <- .get_data_from_loaded_file(out, verbose)\n  # if file could be coerced to a data frame, overwrite out\n  if (!is.null(valid_data_object)) {\n    out <- valid_data_object\n  }\n\n  out\n}\n\n\n.read_base_rda <- function(path, file_type, password, verbose = TRUE, ...) {\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n\n  # check URLs\n  path <- .check_path_url(path, file_type)\n\n  # since RData and rda can keep multiple files, we load them into a\n  # new environment and return them as list object then\n  env <- new.env()\n  load(file = path, envir = env)\n\n  # if the RData file contains more than one object, we don't check the output\n  # but just return everything\n  if (length(ls(env)) > 1) {\n    if (verbose) {\n      insight::format_alert(\n        \"File contained more than one object, returning all objects.\"\n      )\n    }\n    return(as.list(env))\n  }\n\n  # else, retrieve loaded object\n  out <- get(ls(env)[1], env)\n\n  # data decryption\n  out <- .data_decryption(out, password)\n\n  # check if loaded file is a data frame, or not (e.g. model objects)\n  # it returns `NULL` if the file is no valid data file that contains a data\n  # frame.frame, or cannot be coerced to a data frame. Else, if it was a data\n  # frame or could be coerced into one, the (new) data frame is returned. In\n  # this case, we overwrite \"out\", else we keep its original object.\n  valid_data_object <- .get_data_from_loaded_file(out, verbose)\n  # if file could be coerced to a data frame, overwrite out\n  if (!is.null(valid_data_object)) {\n    out <- valid_data_object\n  }\n\n  out\n}\n\n\n.read_base_rds <- function(path, password, verbose = TRUE, ...) {\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n\n  # check URLs\n  path <- .check_path_url(path, file_type = \"rds\")\n  out <- readRDS(file = path)\n\n  # data decryption\n  out <- .data_decryption(out, password)\n\n  # check if loaded file is a data frame, or not (e.g. model objects)\n  # it returns `NULL` if the file is no valid data file that contains a data\n  # frame.frame, or cannot be coerced to a data frame. Else, if it was a data\n  # frame or could be coerced into one, the (new) data frame is returned. In\n  # this case, we overwrite \"out\", else we keep its original object.\n  valid_data_object <- .get_data_from_loaded_file(out, verbose)\n  # if file could be coerced to a data frame, overwrite out\n  if (!is.null(valid_data_object)) {\n    out <- valid_data_object\n  }\n\n  out\n}\n\n\n.read_parquet <- function(path, verbose = TRUE, ...) {\n  # requires nanoparquet package\n  insight::check_if_installed(\"nanoparquet\")\n\n  if (verbose) {\n    insight::format_alert(\"Reading data...\")\n  }\n\n  # check URLs\n  path <- .check_path_url(path, file_type = \"parquet\")\n  out <- nanoparquet::read_parquet(file = path, ...)\n  as.data.frame(out)\n}\n\n\n# check input helper --------------------------------------------------------\n\n# for URLs, we need to download the file and save it locally\n.check_path_url <- function(path, file_type) {\n  url_pattern <- \"^(https?|ftp)://(.*)\"\n  # check if file path is an URL\n  if (grepl(url_pattern, path)) {\n    insight::check_if_installed(\"curl\")\n    if (curl::has_internet()) {\n      # if yes, create temp file and save file locally\n      temp_file <- tempfile(fileext = paste0(\".\", file_type))\n      download <- curl::curl_fetch_memory(path)\n      writeBin(object = download$content, con = temp_file)\n      # return path to temp file\n      path <- temp_file\n    } else {\n      insight::format_error(\n        \"No internet connection detected. Could not download file from URL.\"\n      )\n    }\n  }\n  path\n}\n\n\n.get_data_from_loaded_file <- function(out, verbose = TRUE) {\n  # it is also possible to read in pre-compiled model objects with data_read()\n  # in this case, just return as is. We do this check before we check with\n  # \"is.data.frame()\", because some models (like brmsfit) have an `as.data.frame()`\n  # method, which coerces the model object into a data frame, which is likely to\n  # be not intentional\n  if (insight::is_model(out)) {\n    if (verbose) {\n      insight::format_alert(\n        paste0(\n          \"Imported file is a regression model object of class \\\"\",\n          class(out)[1],\n          \"\\\".\"\n        ),\n        \"Returning file as is.\"\n      )\n    }\n    return(NULL)\n  }\n\n  # for \"unknown\" data formats (like .RDS), which still can be imported via\n  # \"rio::import()\", we must check whether we actually have a data frame or\n  # not. Else, tell user.\n  if (!is.data.frame(out)) {\n    tmp <- tryCatch(\n      as.data.frame(out, stringsAsFactors = FALSE),\n      error = function(e) NULL\n    )\n    if (is.null(tmp)) {\n      if (verbose) {\n        insight::format_warning(\n          paste0(\n            \"Imported file is no data frame, but of class \\\"\",\n            class(out)[1],\n            \"\\\".\"\n          ),\n          \"Returning file as is. Please check if importing this file was intended.\"\n        )\n      }\n      return(NULL)\n    }\n    out <- tmp\n  }\n  out\n}\n\n# decrypt data ---------------------------------\n\n.data_decryption <- function(data, password = NULL) {\n  # check if data should be decrypted\n  if (!is.null(password)) {\n    .validate_password(password)\n    data <- .decrypt_data(data, password)\n  }\n  data\n}\n\n.decrypt_data <- function(data, password = NULL) {\n  insight::check_if_installed(\"openssl\", \"for data decryption\")\n  # it is important to remember the phrase! else, you cannot decrypt the data\n  passphrase <- charToRaw(password)\n  key <- openssl::sha256(passphrase)\n  # decrypt the data. in case of wrong password, `unserialize()` errors\n  out <- tryCatch(\n    unserialize(openssl::aes_gcm_decrypt(data, key = key)),\n    error = function(e) NULL\n  )\n  # check if we had encrypted data at all?\n  if (is.null(out)) {\n    insight::format_error(\n      \"File does not appear to be encrypted with {datawizard}, or you provided the wrong password.\"\n    )\n  }\n  out\n}\n"
  },
  {
    "path": "R/data_relocate.R",
    "content": "#' @title Relocate (reorder) columns of a data frame\n#' @name data_relocate\n#'\n#' @description\n#' `data_relocate()` will reorder columns to specific positions, indicated by\n#' `before` or `after`. `data_reorder()` will instead move selected columns to\n#' the beginning of a data frame. Finally, `data_remove()` removes columns\n#' from a data frame. All functions support select-helpers that allow flexible\n#' specification of a search pattern to find matching columns, which should\n#' be reordered or removed.\n#'\n#' @param data A data frame.\n#' @param before,after Destination of columns. Supplying neither will move\n#'   columns to the left-hand side; specifying both is an error. Can be a\n#'   character vector, indicating the name of the destination column, or a\n#'   numeric value, indicating the index number of the destination column.\n#'   If `-1`, will be added before or after the last column.\n#' @inheritParams extract_column_names\n#' @inheritParams data_rename\n#'\n#' @inherit data_rename seealso\n#'\n#' @return A data frame with reordered columns.\n#'\n#' @examples\n#' # Reorder columns\n#' head(data_relocate(iris, select = \"Species\", before = \"Sepal.Length\"))\n#' head(data_relocate(iris, select = \"Species\", before = \"Sepal.Width\"))\n#' head(data_relocate(iris, select = \"Sepal.Width\", after = \"Species\"))\n#' # which is same as\n#' head(data_relocate(iris, select = \"Sepal.Width\", after = -1))\n#'\n#' # Reorder multiple columns\n#' head(data_relocate(iris, select = c(\"Species\", \"Petal.Length\"), after = \"Sepal.Width\"))\n#' # which is same as\n#' head(data_relocate(iris, select = c(\"Species\", \"Petal.Length\"), after = 2))\n#'\n#' # Reorder columns\n#' head(data_reorder(iris, c(\"Species\", \"Sepal.Length\")))\n#'\n#' @export\ndata_relocate <- function(\n  data,\n  select,\n  before = NULL,\n  after = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # Sanitize\n  if (!is.null(before) && !is.null(after)) {\n    insight::format_error(\"You must supply only one of `before` or `after`.\")\n  }\n\n  # allow numeric values\n  if (!is.null(before) && is.numeric(before)) {\n    if (before == -1) {\n      before <- names(data)[ncol(data)]\n    } else if (before >= 1 && before <= ncol(data)) {\n      before <- names(data)[before]\n    } else {\n      insight::format_error(\"No valid position defined in `before`.\")\n    }\n  }\n\n  # allow numeric values\n  if (!is.null(after) && is.numeric(after)) {\n    if (after == -1) {\n      after <- names(data)[ncol(data)]\n    } else if (after >= 1 && after <= ncol(data)) {\n      after <- names(data)[after]\n    } else {\n      insight::format_error(\"No valid position defined in `after`.\")\n    }\n  }\n\n  cols <- .select_nse(\n    select,\n    data,\n    exclude = NULL,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # save attributes\n  attr_data <- attributes(data)\n\n  # Move columns to the right hand side\n  data <- data[c(setdiff(names(data), cols), cols)]\n\n  # Get columns and their original position\n  data_cols <- names(data)\n  position <- which(data_cols %in% cols)\n\n  # remember original values, for more informative messages\n  original_before <- before\n  original_after <- after\n\n  # Find new positions\n  # nolint start\n  if (!is.null(before)) {\n    before <- before[before %in% data_cols][1] # Take first that exists (if vector is supplied)\n    if (length(before) != 1 || is.na(before)) {\n      # guess the misspelled column\n      insight::format_error(\n        \"The column passed to `before` wasn't found.\",\n        .misspelled_string(\n          data_cols,\n          original_before[1],\n          default_message = \"Possibly misspelled?\"\n        )\n      )\n    }\n    where <- min(match(before, data_cols))\n    position <- c(setdiff(position, where), where)\n  } else if (!is.null(after)) {\n    after <- after[after %in% data_cols][1] # Take first that exists (if vector is supplied)\n    if (length(after) != 1 || is.na(after)) {\n      # guess the misspelled column\n      insight::format_error(\n        \"The column passed to `after` wasn't found.\",\n        .misspelled_string(\n          data_cols,\n          original_after[1],\n          default_message = \"Possibly misspelled?\"\n        )\n      )\n    }\n    where <- max(match(after, data_cols))\n    position <- c(where, setdiff(position, where))\n  } else {\n    where <- 1\n    position <- union(position, where)\n  }\n  # nolint end\n\n  # Set left and right side\n  lhs <- setdiff(seq(1, where - 1), position)\n  rhs <- setdiff(seq(where + 1, ncol(data)), position)\n  position <- unique(c(lhs, position, rhs))\n  position <- position[position <= length(data_cols)]\n\n  out <- data[position]\n  out <- .replace_attrs(out, attr_data)\n\n  out\n}\n\n\n#' @rdname data_relocate\n#' @export\ndata_reorder <- function(\n  data,\n  select,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  cols <- .select_nse(\n    select,\n    data,\n    exclude = NULL,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n  remaining_columns <- setdiff(colnames(data), cols)\n\n  out <- data[c(cols, remaining_columns)]\n  out <- .replace_attrs(out, attributes(data))\n  out\n}\n"
  },
  {
    "path": "R/data_remove.R",
    "content": "#' @inheritParams extract_column_names\n#' @rdname data_relocate\n#' @examples\n#' # Remove columns\n#' head(data_remove(iris, \"Sepal.Length\"))\n#' head(data_remove(iris, starts_with(\"Sepal\")))\n#' @export\ndata_remove <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE,\n  ...\n) {\n  ## TODO set verbose = TRUE by default in a later update?\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    data,\n    exclude = NULL,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # nothing to remove?\n  if (!length(select)) {\n    return(data)\n  }\n\n  out <- data[!colnames(data) %in% select]\n  out <- .replace_attrs(out, attributes(data))\n\n  out\n}\n"
  },
  {
    "path": "R/data_rename.R",
    "content": "#' @title Rename columns and variable names\n#' @name data_rename\n#'\n#' @description Safe and intuitive functions to rename variables or rows in\n#'   data frames. `data_rename()` will rename column names, i.e. it facilitates\n#'   renaming variables. `data_rename_rows()` is a convenient shortcut\n#'   to add or rename row names of a data frame, but unlike `row.names()`, its\n#'   input and output is a data frame, thus, integrating smoothly into a\n#'   possible pipe-workflow.\n#'\n#' @inheritParams extract_column_names\n#' @param data A data frame.\n#' @param replacement Character vector. Can be one of the following:\n#'   - A character vector that indicates the new names of the columns selected\n#'     in `select`. `select` and `replacement` must be of the same length.\n#'   - A string (i.e. character vector of length 1) with a \"glue\" styled\n#'     pattern. Currently supported tokens are:\n#'     - `{col}` which will be replaced by the column name, i.e. the\n#'       corresponding value in `select`.\n#'     - `{n}` will be replaced by the number of the variable that is replaced.\n#'     - `{letter}` will be replaced by alphabetical letters in sequential\n#'       order.\n#'       If more than 26 letters are required, letters are repeated, but have\n#'       sequential numeric indices (e.g., `a1` to `z1`, followed by `a2` to\n#'       `z2`).\n#'     - Finally, the name of a user-defined object that is available in the\n#'       environment can be used. Note that the object's name is not allowed to\n#'       be one of the pre-defined tokens, `\"col\"`, `\"n\"` and `\"letter\"`.\n#'\n#'     An example for the use of tokens is...\n#'     ```r\n#'     data_rename(\n#'       mtcars,\n#'       select = c(\"am\", \"vs\"),\n#'       replacement = \"new_name_from_{col}\"\n#'     )\n#'     ```\n#'     ... which would return new column names `new_name_from_am` and\n#'     `new_name_from_vs`. See 'Examples'.\n#'\n#' If `select` is a named vector, `replacement` is ignored.\n#' @param rows Vector of row names.\n#' @param ... Other arguments passed to or from other functions.\n#'\n#' @details\n#' `select` can also be a named character vector. In this case, the names are\n#' used to rename the columns in the output data frame. If you have a named\n#' list, use `unlist()` to convert it to a named vector. See 'Examples'.\n#'\n#' @return A modified data frame.\n#'\n#' @examples\n#' # Rename columns\n#' head(data_rename(iris, \"Sepal.Length\", \"length\"))\n#'\n#' # Use named vector to rename\n#' head(data_rename(iris, c(length = \"Sepal.Length\", width = \"Sepal.Width\")))\n#'\n#' # Change all\n#' head(data_rename(iris, replacement = paste0(\"Var\", 1:5)))\n#'\n#' # Use glue-styled patterns\n#' head(data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"formerly_{col}\"))\n#' head(data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"{col}_is_column_{n}\"))\n#' head(data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"new_{letter}\"))\n#'\n#' # User-defined glue-styled patterns from objects in environment\n#' x <- c(\"hi\", \"there\", \"!\")\n#' head(data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"col_{x}\"))\n#' @seealso\n#' - Add a prefix or suffix to column names: [data_addprefix()], [data_addsuffix()]\n#' - Functions to reorder or remove columns: [data_reorder()], [data_relocate()],\n#'   [data_remove()]\n#' - Functions to reshape, pivot or rotate data frames: [data_to_long()],\n#'   [data_to_wide()], [data_rotate()]\n#' - Functions to recode data: [rescale()], [reverse()], [categorize()],\n#'   [recode_values()], [slide()]\n#' - Functions to standardize, normalize, rank-transform: [center()], [standardize()],\n#'   [normalize()], [ranktransform()], [winsorize()]\n#' - Split and merge data frames: [data_partition()], [data_merge()]\n#' - Functions to find or select columns: [data_select()], [extract_column_names()]\n#' - Functions to filter rows: [data_match()], [data_filter()]\n#'\n#' @export\ndata_rename <- function(data, select = NULL, replacement = NULL, ...) {\n  # check for valid input\n  if (!is.data.frame(data)) {\n    insight::format_error(\"Argument `data` must be a data frame.\")\n  }\n\n  # change all names if no pattern specified\n  select <- .select_nse(\n    select,\n    data,\n    exclude = NULL,\n    ignore_case = NULL,\n    regex = NULL,\n    allow_rename = TRUE,\n    ifnotfound = \"error\"\n  )\n\n  # Forbid partially named \"select\",\n  # Ex: if select = c(\"foo\" = \"Species\", \"Sepal.Length\") then the 2nd name and\n  # 2nd value are \"Sepal.Length\"\n  if (!is.null(names(select)) && any(names(select) == select)) {\n    insight::format_error(\n      \"When `select` is a named vector, all elements must be named.\"\n    )\n  }\n\n  # check if `select` has names, and if so, use as \"replacement\"\n  if (!is.null(names(select))) {\n    replacement <- names(select)\n  }\n\n  # coerce to character\n  replacement <- as.character(replacement)\n\n  # check if `replacement` has no empty strings and no NA values\n  invalid_replacement <- is.na(replacement) | !nzchar(replacement)\n  if (any(invalid_replacement)) {\n    if (is.null(names(select))) {\n      # when user did not match `select` with `replacement`\n      msg <- c(\n        \"`replacement` is not allowed to have `NA` or empty strings.\",\n        sprintf(\n          \"Following values in `select` have no match in `replacement`: %s\",\n          toString(select[invalid_replacement])\n        )\n      )\n    } else {\n      # when user did not name all elements of `select`\n      msg <- c(\n        \"Either name all elements of `select` or use `replacement`.\",\n        sprintf(\n          \"Following values in `select` were not named: %s\",\n          toString(select[invalid_replacement])\n        )\n      )\n    }\n    insight::format_error(msg)\n  }\n\n  # if duplicated names in replacement, append \".2\", \".3\", etc. to duplicates\n  # ex: c(\"foo\", \"foo\") -> c(\"foo\", \"foo.2\")\n  if (anyDuplicated(replacement) > 0L) {\n    dup <- as.data.frame(table(replacement))\n    dup <- dup[dup$Freq > 1, ]\n    for (i in dup$replacement) {\n      to_replace <- which(replacement == i)[-1]\n      new_replacement <- paste0(i, \".\", 1 + seq_along(to_replace))\n      replacement[to_replace] <- new_replacement\n    }\n  }\n\n  # check if we have \"glue\" styled replacement-string\n  glue_style <- length(replacement) == 1 &&\n    grepl(\"{\", replacement, fixed = TRUE)\n\n  if (length(replacement) > length(select)) {\n    insight::format_error(\n      \"There are more names in `replacement` than in `select`.\"\n    )\n  } else if (length(replacement) < length(select) && !glue_style) {\n    insight::format_error(\n      \"There are more names in `select` than in `replacement`\"\n    )\n  }\n\n  # if we have glue-styled replacement-string, create replacement select now\n  if (glue_style) {\n    replacement <- .glue_replacement(select, replacement)\n  }\n\n  for (i in seq_along(select)) {\n    if (!is.na(replacement[i])) {\n      data <- .data_rename(data, select[i], replacement[i])\n    }\n  }\n\n  data\n}\n\n#' @keywords internal\n.data_rename <- function(data, pattern, replacement) {\n  if (!pattern %in% names(data)) {\n    insight::format_error(paste0(\n      \"Variable `\",\n      pattern,\n      \"` is not in your data frame :/\"\n    ))\n  }\n\n  names(data) <- replace(names(data), names(data) == pattern, replacement)\n\n  data\n}\n\n\n.glue_replacement <- function(pattern, replacement) {\n  # this function replaces \"glue\" tokens into their related\n  # real names/values. Currently, following tokens are accepted:\n  # - {col}: replacement is the name of the column (indicated in \"pattern\")\n  # - {letter}: replacement is lower-case alphabetically letter, in sequential order\n  # - {n}: replacement is the number of the variable out of n, that should be renamed\n  out <- rep_len(\"\", length(pattern))\n\n  # for alphabetical letters, we prepare a string if we have more than\n  # 26 columns to rename\n  if (length(out) > 26) {\n    long_letters <- paste0(\n      rep.int(letters[1:26], times = ceiling(length(out) / 26)),\n      rep(1:ceiling(length(out) / 26), each = 26)\n    )\n  } else {\n    long_letters <- letters[1:26]\n  }\n  long_letters <- long_letters[seq_along(out)]\n\n  for (i in seq_along(out)) {\n    # prepare pattern\n    column_name <- pattern[i]\n    out[i] <- replacement\n    # replace first pre-defined token\n    out[i] <- gsub(\n      \"(.*)(\\\\{col\\\\})(.*)\",\n      replacement = paste0(\"\\\\1\", column_name, \"\\\\3\"),\n      x = out[i]\n    )\n    # replace second pre-defined token\n    out[i] <- gsub(\n      \"(.*)(\\\\{n\\\\})(.*)\",\n      replacement = paste0(\"\\\\1\", i, \"\\\\3\"),\n      x = out[i]\n    )\n    # replace third pre-defined token\n    out[i] <- gsub(\n      \"(.*)(\\\\{letter\\\\})(.*)\",\n      replacement = paste0(\"\\\\1\", long_letters[i], \"\\\\3\"),\n      x = out[i]\n    )\n    # extract all non-standard tokens\n    matches <- unlist(\n      regmatches(out[i], gregexpr(\"\\\\{([^}]*)\\\\}\", out[i])),\n      use.names = FALSE\n    )\n    # do we have any additional tokens, i.e. variable names from the environment?\n    # users can also specify variable names, where the\n    if (length(matches)) {\n      # if so, iterate all tokens\n      for (token in matches) {\n        # evaluate token-object from the environment\n        values <- .dynEval(\n          str2lang(gsub(\"\\\\{(.*)\\\\}\", \"\\\\1\", token)),\n          ifnotfound = insight::format_error(paste0(\n            \"The object `\",\n            token,\n            \"` was not found. Please check if it really exists.\"\n          ))\n        )\n        # check for correct length\n        if (length(values) != length(pattern)) {\n          insight::format_error(paste0(\n            \"The number of values provided in `\",\n            token,\n            \"` (\",\n            length(values),\n            \" values) do not match the number of columns to rename (\",\n            length(pattern),\n            \" columns).\"\n          ))\n        }\n        # replace token with values from the object\n        if (length(values)) {\n          out[i] <- gsub(token, values[i], out[i], fixed = TRUE)\n        }\n      }\n    }\n  }\n  out\n}\n\n\n# Row.names ----------------------------------------------------------------\n\n#' @rdname data_rename\n#' @export\ndata_rename_rows <- function(data, rows = NULL) {\n  row.names(data) <- rows\n  data\n}\n"
  },
  {
    "path": "R/data_replicate.R",
    "content": "#' @title Expand (i.e. replicate rows) a data frame\n#' @name data_replicate\n#'\n#' @description\n#' Expand a data frame by replicating rows based on another variable that\n#' contains the counts of replications per row.\n#'\n#' @param data A data frame.\n#' @param expand The name of the column that contains the counts of replications\n#' for each row. Can also be a numeric value, indicating the position of that\n#' column. Note that the variable indicated by `expand` must be an integer vector.\n#' @param remove_na Logical. If `TRUE`, missing values in the column\n#' provided in `expand` are removed from the data frame. If `FALSE` and `expand`\n#' contains missing values, the function will throw an error.\n#' @param ... Currently not used.\n#' @inheritParams extract_column_names\n#'\n#' @return A dataframe with each row replicated as many times as defined in `expand`.\n#'\n#' @examples\n#' data(mtcars)\n#' data_replicate(head(mtcars), \"carb\")\n#' @export\ndata_replicate <- function(\n  data,\n  expand = NULL,\n  select = NULL,\n  exclude = NULL,\n  remove_na = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n) {\n  # we need a name for the new column\n  if (is.null(expand)) {\n    insight::format_error(\n      \"No column that should be used to expand the data frame was provided. Please use `expand` to define a column.\"\n    )\n  }\n\n  # only one column name\n  if (length(expand) > 1) {\n    insight::format_error(\n      \"Please provide only a single string for `expand`, no character vector with multiple values.\"\n    )\n  }\n\n  # check if numerics, and if so, use column name\n  if (is.numeric(expand)) {\n    expand <- colnames(data)[expand]\n  }\n\n  # check if in data\n  if (!expand %in% colnames(data)) {\n    insight::format_error(\n      \"The column provided in `expand` does not exist in the data frame.\",\n      .misspelled_string(colnames(data), expand, \"Possibly misspelled?\")\n    )\n  }\n\n  # check that \"expand\" contains no Inf\n  if (any(is.infinite(data[[expand]]))) {\n    insight::format_error(\n      \"The column provided in `expand` contains infinite values. Please provide a column that does not contain infinite values.\" # nolint\n    )\n  }\n\n  # check that \"expand\" is integer\n  if (!.is_integer(data[[expand]])) {\n    insight::format_error(\n      \"The column provided in `expand` is not of type integer. Please provide a column that contains integer values.\" # nolint\n    )\n  }\n\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # extract variable that contains the counts of replicates\n  replicates <- data[[expand]]\n  # we can remove that column now\n  data[[expand]] <- NULL\n\n  # also remove \"expand\" from \"select\" string\n  select <- setdiff(select, expand)\n\n  # if user doesn't want to remove \"NA\", but replicates contain \"NA\",\n  # give informative error here\n  if (!remove_na && anyNA(replicates)) {\n    insight::format_error(\n      \"The column provided in `expand` contains missing values, but `remove_na` is set to `FALSE`.\",\n      \"Please set `remove_na` to `TRUE` or remove the missing values from the `expand` variable.\"\n    )\n  }\n\n  # remove rows where \"expand\" is NA\n  data <- data[!is.na(replicates), , drop = FALSE]\n  replicates <- replicates[!is.na(replicates)]\n\n  # fin\n  as.data.frame(do.call(\n    cbind,\n    lapply(data[select], rep.int, times = replicates)\n  ))\n}\n\n\n# is.integer(c(1, 2)) -> FALSE\n# all(c(1, 2) %% 1 == 0) -> TRUE\n.is_integer <- function(x, remove_na = TRUE) {\n  if (remove_na) {\n    x <- x[!is.na(x)]\n  }\n  tryCatch(\n    all(x %% 1 == 0),\n    warning = function(w) is.integer(x),\n    error = function(e) FALSE\n  )\n}\n"
  },
  {
    "path": "R/data_rescale.R",
    "content": "#' @title Rescale Variables to a New Range\n#' @name rescale\n#'\n#' @description\n#' Rescale variables to a new range. Can also be used to reverse-score variables\n#' (change the keying/scoring direction), or to expand a range.\n#'\n#' @inheritParams categorize\n#' @inheritParams extract_column_names\n#' @inheritParams standardize.data.frame\n#' @param to Numeric vector of length 2 giving the new range that the variable\n#'   will have after rescaling. To reverse-score a variable, the range should\n#'   be given with the maximum value first. See examples.\n#' @param multiply If not `NULL`, `to` is ignored and `multiply` will be used,\n#'   giving the factor by which the actual range of `x` should be expanded.\n#'   For example, if a vector ranges from 5 to 15 and `multiply = 1.1`, the current\n#'   range of 10 will be expanded by the factor of 1.1, giving a new range of\n#'   11. Thus, the rescaled vector would range from 4.5 to 15.5.\n#' @param add A vector of length 1 or 2. If not `NULL`, `to` is ignored and `add`\n#'   will be used, giving the amount by which the minimum and maximum of the\n#'   actual range of `x` should be expanded. For example, if a vector ranges from\n#'   5 to 15 and `add = 1`, the range will be expanded from 4 to 16. If `add` is\n#'   of length 2, then the first value is used for the lower bound and the second\n#'   value for the upper bound.\n#' @param range Initial (old) range of values. If `NULL`, will take the range of\n#'   the input vector (`range(x)`).\n#' @param ... Arguments passed to or from other methods.\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @seealso See [makepredictcall.dw_transformer()] for use in model formulas.\n#' @family transform utilities\n#'\n#' @return A rescaled object.\n#'\n#' @examples\n#' rescale(c(0, 1, 5, -5, -2))\n#' rescale(c(0, 1, 5, -5, -2), to = c(-5, 5))\n#' rescale(c(1, 2, 3, 4, 5), to = c(-2, 2))\n#'\n#' # Specify the \"theoretical\" range of the input vector\n#' rescale(c(1, 3, 4), to = c(0, 40), range = c(0, 4))\n#'\n#' # Reverse-score a variable\n#' rescale(c(1, 2, 3, 4, 5), to = c(5, 1))\n#' rescale(c(1, 2, 3, 4, 5), to = c(2, -2))\n#'\n#' # Data frames\n#' head(rescale(iris, to = c(0, 1)))\n#' head(rescale(iris, to = c(0, 1), select = \"Sepal.Length\"))\n#'\n#' # One can specify a list of ranges\n#' head(rescale(iris, to = list(\n#'   \"Sepal.Length\" = c(0, 1),\n#'   \"Petal.Length\" = c(-1, 0)\n#' )))\n#'\n#' # \"expand\" ranges by a factor or a given value\n#' x <- 5:15\n#' x\n#' # both will expand the range by 10%\n#' rescale(x, multiply = 1.1)\n#' rescale(x, add = 0.5)\n#'\n#' # expand range by different values\n#' rescale(x, add = c(1, 3))\n#'\n#' # Specify list of multipliers\n#' d <- data.frame(x = 5:15, y = 5:15)\n#' rescale(d, multiply = list(x = 1.1, y = 0.5))\n#' @export\nrescale <- function(x, ...) {\n  UseMethod(\"rescale\")\n}\n\n\n#' @rdname rescale\n#' @export\nchange_scale <- function(x, ...) {\n  # Alias for rescale()\n  rescale(x, ...)\n}\n\n\n#' @export\nrescale.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      paste0(\n        \"Variables of class `\",\n        class(x)[1],\n        \"` can't be rescaled and remain unchanged.\"\n      )\n    )\n  }\n  x\n}\n\n\n#' @rdname rescale\n#' @export\nrescale.numeric <- function(\n  x,\n  to = c(0, 100),\n  multiply = NULL,\n  add = NULL,\n  range = NULL,\n  verbose = TRUE,\n  ...\n) {\n  if (is.null(to)) {\n    return(x)\n  }\n\n  # Warning if all NaNs\n  if (all(is.na(x))) {\n    return(x)\n  }\n\n  if (is.null(range)) {\n    range <- c(min(x, na.rm = TRUE), max(x, na.rm = TRUE))\n  }\n\n  # check if user specified \"multiply\" or \"add\", and then update \"to\"\n  to <- .update_to(x, to, multiply, add)\n\n  # called from \"makepredictcal()\"? Then we have additional arguments\n  dot_args <- list(...)\n  required_dot_args <- c(\"min_value\", \"max_value\", \"new_min\", \"new_max\")\n  flag_predict <- FALSE\n\n  if (all(required_dot_args %in% names(dot_args))) {\n    # we gather informatiom about the original data, which is needed\n    # for \"predict()\" to work properly when \"rescale()\" is called\n    # in formulas on-the-fly, e.g. \"lm(mpg ~ rescale(hp), data = mtcars)\"\n    min_value <- dot_args$min_value\n    max_value <- dot_args$max_value\n    new_min <- dot_args$new_min\n    new_max <- dot_args$new_max\n    flag_predict <- TRUE\n  } else {\n    min_value <- ifelse(is.na(range[1]), min(x, na.rm = TRUE), range[1])\n    max_value <- ifelse(is.na(range[2]), max(x, na.rm = TRUE), range[2])\n    new_min <- ifelse(is.na(to[1]), min_value, to[1])\n    new_max <- ifelse(is.na(to[2]), max_value, to[2])\n  }\n\n  # Warning if only one value\n  if (!flag_predict && insight::has_single_value(x) && is.null(range)) {\n    if (verbose) {\n      insight::format_warning(\n        \"A `range` must be provided for data with only one unique value.\"\n      )\n    }\n    return(x)\n  }\n\n  out <- as.vector(\n    (new_max - new_min) / (max_value - min_value) * (x - min_value) + new_min\n  )\n\n  attr(out, \"min_value\") <- min_value\n  attr(out, \"max_value\") <- max_value\n  attr(out, \"new_min\") <- new_min\n  attr(out, \"new_max\") <- new_max\n  attr(out, \"range_difference\") <- max_value - min_value\n  attr(out, \"to_range\") <- c(new_min, new_max)\n  # don't add attribute when we call data frame methods\n  if (!isFALSE(dot_args$add_transform_class)) {\n    class(out) <- c(\"dw_transformer\", class(out))\n  }\n\n  out\n}\n\n\n#' @export\nrescale.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  to = c(0, 100),\n  multiply = NULL,\n  add = NULL,\n  range = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE,\n  ...\n) {\n  info <- attributes(x)\n\n  grps <- attr(x, \"groups\", exact = TRUE)[[\".rows\"]]\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    remove_group_var = TRUE,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_r\",\n      preserve_value_labels = TRUE\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  x <- as.data.frame(x)\n  for (rows in grps) {\n    x[rows, ] <- rescale(\n      x[rows, , drop = FALSE],\n      select = select,\n      exclude = exclude,\n      to = to,\n      multiply = multiply,\n      add = add,\n      range = range,\n      append = FALSE, # need to set to FALSE here, else variable will be doubled\n      add_transform_class = FALSE,\n      ...\n    )\n  }\n  # set back class, so data frame still works with dplyr\n  attributes(x) <- utils::modifyList(info, attributes(x))\n  x\n}\n\n\n#' @rdname rescale\n#' @export\nrescale.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  to = c(0, 100),\n  multiply = NULL,\n  add = NULL,\n  range = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_r\"\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  # Transform the range so that it is a list now\n  if (!is.null(range) && !is.list(range)) {\n    range <- stats::setNames(rep(list(range), length(select)), select)\n  }\n  # Transform the 'to' so that it is a list now\n  if (!is.list(to)) {\n    to <- stats::setNames(rep(list(to), length(select)), select)\n  }\n  # Transform the 'multiply' so that it is a list now\n  if (!is.null(multiply) && !is.list(multiply)) {\n    multiply <- stats::setNames(rep(list(multiply), length(select)), select)\n  }\n  # Transform the 'add' so that it is a list now\n  if (!is.null(add) && !is.list(add)) {\n    add <- stats::setNames(rep(list(add), length(select)), select)\n  }\n  # update \"to\" if user specified \"multiply\" or \"add\"\n  to[] <- lapply(names(to), function(i) {\n    .update_to(x[[i]], to[[i]], multiply[[i]], add[[i]])\n  })\n\n  x[select] <- as.data.frame(sapply(\n    select,\n    function(n) {\n      rescale(\n        x[[n]],\n        to = to[[n]],\n        range = range[[n]],\n        add_transform_class = FALSE\n      )\n    },\n    simplify = FALSE\n  ))\n  x\n}\n\n\n# helper ----------------------------------------------------------------------\n\n# expand the new target range by multiplying or adding\n.update_to <- function(x, to, multiply, add) {\n  # check if user specified \"multiply\" or \"add\", and if not, return \"to\"\n  if (is.null(multiply) && is.null(add)) {\n    return(to)\n  }\n  # only one of \"multiply\" or \"add\" can be specified\n  if (!is.null(multiply) && !is.null(add)) {\n    insight::format_error(\"Only one of `multiply` or `add` can be specified.\")\n  }\n  # multiply? If yes, calculate the \"add\" value\n  if (!is.null(multiply)) {\n    # check for correct length\n    if (length(multiply) > 1) {\n      insight::format_error(\"The length of `multiply` must be 1.\")\n    }\n    add <- (diff(range(x, na.rm = TRUE)) * (multiply - 1)) / 2\n  }\n  # add?\n  if (!is.null(add)) {\n    # add must be of length 1 or 2\n    if (length(add) > 2) {\n      insight::format_error(\"The length of `add` must be 1 or 2.\")\n    }\n    # if add is of length 2, then the first value is used for the lower bound\n    # and the second value for the upper bound\n    if (length(add) == 2) {\n      add_low <- add[1]\n      add_high <- add[2]\n    } else {\n      add_low <- add_high <- add\n    }\n    to <- c(min(x, na.rm = TRUE) - add_low, max(x, na.rm = TRUE) + add_high)\n  }\n  to\n}\n"
  },
  {
    "path": "R/data_restoretype.R",
    "content": "#' Restore the type of columns according to a reference data frame\n#'\n#' @param data A data frame for which to restore the column types.\n#' @inheritParams data_to_long\n#' @inheritParams data_rename\n#' @param reference A reference data frame from which to find the correct\n#'   column types. If `NULL`, each column is converted to numeric if it doesn't\n#'   generate `NA`s. For example, `c(\"1\", \"2\")` can be converted to numeric but not\n#'   `c(\"Sepal.Length\")`.\n#' @return\n#'\n#' A data frame with columns whose types have been restored based on the\n#' reference data frame.\n#'\n#' @examples\n#' data <- data.frame(\n#'   Sepal.Length = c(\"1\", \"3\", \"2\"),\n#'   Species = c(\"setosa\", \"versicolor\", \"setosa\"),\n#'   New = c(\"1\", \"3\", \"4\")\n#' )\n#'\n#' fixed <- data_restoretype(data, reference = iris)\n#' summary(fixed)\n#' @export\n\ndata_restoretype <- function(data, reference = NULL, ...) {\n  for (col in names(data)) {\n    # No reference data (regular fixing) ----------------\n    if (is.null(reference)) {\n      if (is.character(data[[col]])) {\n        data[[col]] <- coerce_to_numeric(data[[col]])\n      }\n    } else {\n      if (is.factor(reference[[col]]) && !is.factor(data[[col]])) {\n        # Restore factor levels\n        data[[col]] <- factor(data[[col]], levels = levels(reference[[col]]))\n      }\n\n      if (is.numeric(reference[[col]]) && !is.numeric(data[[col]])) {\n        data[[col]] <- coerce_to_numeric(as.character(data[[col]]))\n      }\n\n      if (is.character(reference[[col]]) && !is.character(data[[col]])) {\n        data[[col]] <- as.character(data[[col]])\n      }\n    }\n  }\n\n  data\n}\n"
  },
  {
    "path": "R/data_reverse.R",
    "content": "#' Reverse-Score Variables\n#'\n#' Reverse-score variables (change the keying/scoring direction).\n#'\n#' @param range Range of values that is used as reference for reversing the\n#'   scale. For numeric variables, can be `NULL` or a numeric vector of length\n#'   two, indicating the lowest and highest value of the reference range. If\n#'   `NULL`, will take the range of the input vector (`range(x)`). For factors,\n#'   `range` can be `NULL`, a numeric vector of length two, or a (numeric)\n#'   vector of at least the same length as factor levels (i.e. must be equal\n#'   to or larger than `nlevels(x)`). Note that providing a `range` for factors\n#'   usually only makes sense when factor levels are numeric, not characters.\n#' @param ... Arguments passed to or from other methods.\n#' @inheritParams categorize\n#' @inheritParams extract_column_names\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @examples\n#' reverse(c(1, 2, 3, 4, 5))\n#' reverse(c(-2, -1, 0, 2, 1))\n#'\n#' # Specify the \"theoretical\" range of the input vector\n#' reverse(c(1, 3, 4), range = c(0, 4))\n#'\n#' # Factor variables\n#' reverse(factor(c(1, 2, 3, 4, 5)))\n#' reverse(factor(c(1, 2, 3, 4, 5)), range = 0:10)\n#'\n#' # Data frames\n#' head(reverse(iris))\n#' head(reverse(iris, select = \"Sepal.Length\"))\n#'\n#' @return A reverse-scored object.\n#'\n#' @family transform utilities\n#'\n#' @inherit data_rename seealso\n#'\n#' @export\nreverse <- function(x, ...) {\n  UseMethod(\"reverse\")\n}\n\n\n#' @rdname reverse\n#' @export\nreverse_scale <- reverse\n\n\n#' @export\nreverse.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      paste0(\n        \"Variables of class '\",\n        class(x)[1],\n        \"' can't be recoded and remain unchanged.\"\n      )\n    )\n  }\n\n  x\n}\n\n\n#' @rdname reverse\n#' @export\nreverse.numeric <- function(x, range = NULL, verbose = TRUE, ...) {\n  # Warning if all NaNs\n  if (all(is.na(x))) {\n    return(x)\n  }\n\n  # Warning if only one value\n  if (insight::has_single_value(x) && is.null(range)) {\n    if (verbose) {\n      insight::format_warning(\n        \"A `range` must be provided for data with only one unique value.\"\n      )\n    }\n    return(x)\n  }\n\n  # no missing values allowed\n  if (anyNA(range)) {\n    insight::format_error(\"`range` is not allowed to have missing values.\")\n  }\n\n  if (is.null(range)) {\n    range <- c(min(x, na.rm = TRUE), max(x, na.rm = TRUE))\n  }\n\n  # old minimum and maximum\n  min_value <- min(range)\n  max_value <- max(range)\n\n  # check if a valid range (i.e. vector of length 2) is provided\n  if (length(range) > 2) {\n    insight::format_error(\n      \"`range` must be a numeric vector of length two, indicating lowest and highest value of the required range.\",\n      sprintf(\n        \"Did you want to provide `range = c(%g, %g)`?\",\n        min_value,\n        max_value\n      )\n    )\n  }\n\n  new_min <- max_value\n  new_max <- min_value\n\n  out <- as.vector(\n    (new_max - new_min) / (max_value - min_value) * (x - min_value) + new_min\n  )\n\n  # labelled data?\n  out <- .set_back_labels(out, x, reverse_values = TRUE)\n  out\n}\n\n\n#' @export\nreverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {\n  # Warning if all NaNs\n  if (all(is.na(x))) {\n    return(x)\n  }\n\n  # Warning if only one value\n  if (insight::has_single_value(x) && is.null(range)) {\n    if (verbose) {\n      insight::format_warning(\n        \"A `range` must be provided for data with only one unique value.\"\n      )\n    }\n    return(x)\n  }\n\n  # save for later use\n  original_x <- x\n\n  if (is.null(range)) {\n    old_levels <- levels(x)\n  } else {\n    # no missing values allowed\n    if (anyNA(range)) {\n      insight::format_error(\"`range` is not allowed to have missing values.\")\n    }\n    range_ok <- TRUE\n    # if we have a vector of length 2 for range, and more factor levels,\n    # we assume `range` indicates minimum and maximum of range values\n    if (length(range) == 2 && nlevels(droplevels(x)) > 2) {\n      if (is.numeric(range)) {\n        range <- min(range):max(range)\n      } else {\n        # if range is of length 2, and we have more than 2 number of levels,\n        # range must be numeric to indicate minima and maxima. if not, stop.\n        range_ok <- FALSE\n      }\n    }\n    if (length(range) > 2 && length(range) < nlevels(droplevels(x))) {\n      # if range has more than two values, but fewer values than number of\n      # factor levels, we cannot associate the reversed scale, so stop\n      range_ok <- FALSE\n    }\n    if (!range_ok) {\n      insight::format_error(\n        \"`range` must be one of the following:\",\n        \"- a numeric vector of length two, indicating lowest and highest value of the required range,\",\n        \"- a vector (numeric or character) of values with at least as many values as number of levels in `x`,\",\n        \"- or `NULL`.\"\n      )\n    }\n    # check if no or not all old levels are in new range\n    if (verbose) {\n      if (!any(levels(x) %in% as.character(range))) {\n        insight::format_warning(\n          \"No current factor level is included in `range`.\",\n          \"Returned factor will only contain missing values.\"\n        )\n      } else if (!all(levels(x) %in% as.character(range))) {\n        insight::format_warning(\n          \"Not all current factor levels are included in `range`.\",\n          \"Returned factor will contain missing values.\"\n        )\n      }\n    }\n    old_levels <- range\n    x <- factor(x, levels = range)\n  }\n\n  int_x <- as.integer(x)\n  rev_x <- reverse(int_x, range = c(1, length(old_levels)))\n  x <- factor(rev_x, levels = seq_along(old_levels), labels = old_levels)\n\n  # labelled data?\n  x <- .set_back_labels(x, original_x, reverse_values = TRUE)\n\n  x\n}\n\n\n#' @export\nreverse.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  range = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE,\n  ...\n) {\n  info <- attributes(x)\n\n  grps <- attr(x, \"groups\", exact = TRUE)[[\".rows\"]]\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    remove_group_var = TRUE,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    arguments <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_r\",\n      preserve_value_labels = TRUE\n    )\n    # update processed arguments\n    x <- arguments$x\n    select <- arguments$select\n  }\n\n  x <- as.data.frame(x)\n  for (rows in grps) {\n    x[rows, ] <- reverse(\n      x[rows, , drop = FALSE],\n      select = select,\n      exclude = exclude,\n      range = range,\n      append = FALSE, # need to set to FALSE here, else variable will be doubled\n      ...\n    )\n  }\n  # set back class, so data frame still works with dplyr\n  attributes(x) <- utils::modifyList(info, attributes(x))\n  x\n}\n\n\n#' @rdname reverse\n#' @export\nreverse.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  range = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    arguments <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_r\",\n      preserve_value_labels = TRUE\n    )\n    # update processed arguments\n    x <- arguments$x\n    select <- arguments$select\n  }\n\n  # Transform the range so that it is a list now\n  if (!is.null(range) && !is.list(range)) {\n    range <- stats::setNames(rep(list(range), length(select)), select)\n  }\n\n  x[select] <- lapply(select, function(n) {\n    reverse(x[[n]], range = range[[n]])\n  })\n  x\n}\n"
  },
  {
    "path": "R/data_rotate.R",
    "content": "#' @title Rotate a data frame\n#' @name data_rotate\n#'\n#' @description\n#' This function rotates a data frame, i.e. columns become rows and vice versa.\n#'   It's the equivalent of using `t()` but restores the `data.frame` class,\n#'   preserves attributes and prints a warning if the data type is\n#'   modified (see example).\n#'\n#' @param data A data frame.\n#' @param rownames Character vector (optional). If not `NULL`, the data frame's\n#'   rownames will be added as (first) column to the output, with `rownames`\n#'   being the name of this column.\n#' @param colnames Logical or character vector (optional). If `TRUE`, the values\n#'   of the first column in `x` will be used as column names in the rotated data\n#'   frame. If a character vector, values from that column are used as column\n#'   names.\n#' @param verbose Toggle warnings.\n#'\n#' @inherit data_rename seealso\n#'\n#' @return A (rotated) data frame.\n#'\n#' @examples\n#' x <- mtcars[1:3, 1:4]\n#'\n#' x\n#'\n#' data_rotate(x)\n#' data_rotate(x, rownames = \"property\")\n#'\n#' # use values in 1. column as column name\n#' data_rotate(x, colnames = TRUE)\n#' data_rotate(x, rownames = \"property\", colnames = TRUE)\n#'\n#' # use either first column or specific column for column names\n#' x <- data.frame(a = 1:5, b = 11:15, c = 21:25)\n#' data_rotate(x, colnames = TRUE)\n#' data_rotate(x, colnames = \"c\")\n#'\n#' @export\ndata_rotate <- function(\n  data,\n  rownames = NULL,\n  colnames = FALSE,\n  verbose = TRUE\n) {\n  # copy attributes\n  attr_data <- attributes(data)\n\n  # check if first column has column names to be used for rotated data\n  if (isTRUE(colnames)) {\n    colnames <- data[[1]]\n    data <- data[-1]\n  } else if (\n    !is.null(colnames) && is.character(colnames) && colnames %in% colnames(data)\n  ) {\n    cn_col <- which(colnames(data) == colnames)\n    colnames <- data[[colnames]]\n    data <- data[-cn_col]\n  } else {\n    colnames <- row.names(data)\n  }\n\n  # warning after possible removal of columns\n  if (\n    verbose &&\n      insight::n_unique(vapply(data, typeof, FUN.VALUE = character(1L))) > 1L\n  ) {\n    insight::format_warning(\n      \"Your data frame contains mixed types of data. After transposition, all variables will be transformed into characters.\"\n    ) # nolint\n  }\n\n  # rotate data frame by 90 degrees\n  out <- as.data.frame(t(as.data.frame(data)))\n\n  # add column names, if requested\n  if (!is.null(colnames)) {\n    # check if we have correct length of column names\n    if (length(colnames) != ncol(out)) {\n      insight::format_warning(\n        \"Length of provided column names does not match number of columns. No column names changed.\"\n      )\n    } else {\n      colnames(out) <- colnames\n    }\n  }\n\n  # add rownames as a new column, if requested\n  if (!is.null(rownames)) {\n    out <- rownames_as_column(out, var = rownames)\n  }\n\n  out <- .replace_attrs(out, attr_data)\n\n  out\n}\n\n\n#' @rdname data_rotate\n#' @export\ndata_transpose <- data_rotate\n"
  },
  {
    "path": "R/data_seek.R",
    "content": "#' @title Find variables by their names, variable or value labels\n#' @name data_seek\n#'\n#' @description This functions seeks variables in a data frame, based on patterns\n#' that either match the variable name (column name), variable labels, value labels\n#' or factor levels. Matching variable and value labels only works for \"labelled\"\n#' data, i.e. when the variables either have a `label` attribute or `labels`\n#' attribute.\n#'\n#' `data_seek()` is particular useful for larger data frames with labelled\n#' data - finding the correct variable name can be a challenge. This function\n#' helps to find the required variables, when only certain patterns of variable\n#' names or labels are known.\n#'\n#' @param data A data frame.\n#' @param pattern Character string (regular expression) to be matched in `data`.\n#' May also be a character vector of length > 1. `pattern` is searched for in\n#' column names, variable label and value labels attributes, or factor levels of\n#' variables in `data`.\n#' @param seek Character vector, indicating where `pattern` is sought. Use one\n#' or more of the following options:\n#'\n#' - `\"names\"`: Searches in column names. `\"column_names\"` and `\"columns\"` are\n#'   aliases for `\"names\"`.\n#' - `\"labels\"`: Searches in variable labels. Only applies when a `label` attribute\n#'   is set for a variable.\n#' - `\"values\"`: Searches in value labels or factor levels. Only applies when a\n#'   `labels` attribute is set for a variable, or if a variable is a factor.\n#'   `\"levels\"` is an alias for `\"values\"`.\n#' - `\"all\"`: Searches in all of the above.\n#' @param fuzzy Logical. If `TRUE`, \"fuzzy matching\" (partial and close distance\n#' matching) will be used to find `pattern`.\n#'\n#' @return A data frame with three columns: the column index, the column name\n#' and - if available - the variable label of all matched variables in `data`.\n#'\n#' @examples\n#' # seek variables with \"Length\" in variable name or labels\n#' data_seek(iris, \"Length\")\n#'\n#' # seek variables with \"dependency\" in names or labels\n#' # column \"e42dep\" has a label-attribute \"elder's dependency\"\n#' data(efc)\n#' data_seek(efc, \"dependency\")\n#'\n#' # \"female\" only appears as value label attribute - default search is in\n#' # variable names and labels only, so no match\n#' data_seek(efc, \"female\")\n#' # when we seek in all sources, we find the variable \"e16sex\"\n#' data_seek(efc, \"female\", seek = \"all\")\n#'\n#' # typo, no match\n#' data_seek(iris, \"Lenght\")\n#' # typo, fuzzy match\n#' data_seek(iris, \"Lenght\", fuzzy = TRUE)\n#' @export\ndata_seek <- function(\n  data,\n  pattern,\n  seek = c(\"names\", \"labels\"),\n  fuzzy = FALSE\n) {\n  # check valid args\n  if (!is.data.frame(data)) {\n    insight::format_error(\"`data` must be a data frame.\")\n  }\n\n  # check valid args\n  seek <- intersect(\n    seek,\n    c(\"names\", \"labels\", \"values\", \"levels\", \"column_names\", \"columns\", \"all\")\n  )\n  if (is.null(seek) || !length(seek)) {\n    insight::format_error(\n      \"`seek` must be one of \\\"names\\\", \\\"labels\\\", \\\"values\\\", a combination of these options, or \\\"all\\\".\"\n    ) # nolint\n  }\n\n  pos1 <- pos2 <- pos3 <- NULL\n\n  pos <- unlist(lapply(pattern, function(search_pattern) {\n    # search in variable names?\n    if (any(seek %in% c(\"names\", \"columns\", \"column_names\", \"all\"))) {\n      pos1 <- grep(search_pattern, colnames(data))\n      # find in near distance?\n      if (fuzzy) {\n        pos1 <- c(\n          pos1,\n          .fuzzy_grep(x = colnames(data), pattern = search_pattern)\n        )\n      }\n    }\n\n    # search in variable labels?\n    if (any(seek %in% c(\"labels\", \"all\"))) {\n      var_labels <- insight::compact_character(unlist(lapply(\n        data,\n        attr,\n        which = \"label\",\n        exact = TRUE\n      )))\n      if (!is.null(var_labels) && length(var_labels)) {\n        found <- grepl(search_pattern, var_labels)\n        pos2 <- match(names(var_labels)[found], colnames(data))\n        # find in near distanc?\n        if (fuzzy) {\n          found <- .fuzzy_grep(x = var_labels, pattern = search_pattern)\n          if (length(found)) {\n            pos2 <- c(pos2, match(names(var_labels)[found], colnames(data)))\n          }\n        }\n      }\n    }\n\n    # search for pattern in value labels or levels?\n    if (any(seek %in% c(\"values\", \"levels\", \"all\"))) {\n      values <- insight::compact_list(lapply(data, function(i) {\n        l <- attr(i, \"labels\", exact = TRUE)\n        if (is.null(l) && is.factor(i)) {\n          levels(i)\n        } else {\n          names(l)\n        }\n      }))\n      if (!is.null(values) && length(values)) {\n        found <- vapply(\n          values,\n          function(i) any(grepl(search_pattern, i)),\n          logical(1)\n        )\n        pos3 <- match(names(found)[found], colnames(data))\n        # find in near distance\n        if (fuzzy) {\n          found <- vapply(\n            values,\n            function(i) {\n              length(.fuzzy_grep(x = i, pattern = search_pattern)) > 0\n            },\n            logical(1)\n          )\n          if (any(found)) {\n            pos3 <- c(pos3, match(names(found)[found], colnames(data)))\n          }\n        }\n      }\n    }\n    c(pos1, pos2, pos3)\n  }))\n\n  # clean up\n  pos <- unique(pos)\n\n  # variable labels of matching variables\n  var_labels <- vapply(\n    colnames(data[pos]),\n    function(i) {\n      l <- attr(data[[i]], \"label\", exact = TRUE)\n      if (is.null(l)) {\n        i\n      } else {\n        l\n      }\n    },\n    character(1)\n  )\n\n  out <- data.frame(\n    index = pos,\n    column = colnames(data)[pos],\n    labels = var_labels,\n    stringsAsFactors = FALSE\n  )\n  # no row names\n  rownames(out) <- NULL\n\n  class(out) <- c(\"data_seek\", \"data.frame\")\n  out\n}\n\n\n# methods ---------------------------------------------------------------------\n\n#' @export\nprint.data_seek <- function(x, ...) {\n  if (nrow(x) == 0) {\n    cat(\"No matches found.\\n\")\n  } else {\n    cat(insight::export_table(x, ...))\n  }\n}\n"
  },
  {
    "path": "R/data_select.R",
    "content": "#' @rdname extract_column_names\n#' @export\ndata_select <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  columns <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    allow_rename = TRUE,\n    verbose = FALSE\n  )\n\n  # save attributes\n  a <- attributes(data)\n\n  if (!length(columns) || is.null(columns)) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\n        \"No column names that matched the required search pattern were found.\"\n      )\n    }\n    return(NULL)\n  }\n\n  out <- data[columns]\n\n  # for named character vectors, we offer the service to directly rename the columns\n  if (!is.null(names(columns))) {\n    colnames(out) <- names(columns)\n  }\n\n  # add back attributes\n  out <- .replace_attrs(out, a)\n  out\n}\n"
  },
  {
    "path": "R/data_separate.R",
    "content": "#' @title Separate single variable into multiple variables\n#' @name data_separate\n#'\n#' @description\n#' Separates a single variable into multiple new variables.\n#'\n#' @param data A data frame.\n#' @param new_columns The names of the new columns, as character vector. If\n#' more than one variable was selected (in `select`), the new names are prefixed\n#' with the name of the original column. `new_columns` can also be a list of\n#' (named) character vectors when multiple variables should be separated. See\n#' 'Examples'.\n#' @param separator Separator between columns. Can be a character vector, which\n#' is then treated as regular expression, or a numeric vector that indicates at\n#' which positions the string values will be split.\n#' @param append Logical, if `FALSE` (default), removes original columns that\n#' were separated. If `TRUE`, all columns are preserved and the new columns are\n#' appended to the data frame.\n#' @param guess_columns If `new_columns` is not given, the required number of\n#' new columns is guessed based on the results of value splitting. For example,\n#' if a variable is split into three new columns, this will be considered as\n#' the required number of new columns, and columns are named `\"split_1\"`,\n#' `\"split_2\"` and `\"split_3\"`. When values from a variable are split into\n#' different amount of new columns, the `guess_column` can be either `\"mode\"`\n#' (number of new columns is based on the most common number of splits), `\"min\"`\n#' or `\"max\"` to use the minimum resp. maximum number of possible splits as\n#' required number of columns.\n#' @param fill How to deal with values that return fewer new columns after\n#' splitting? Can be `\"left\"` (fill missing columns from the left with `NA`),\n#' `\"right\"` (fill missing columns from the right with `NA`) or `\"value_left\"`\n#' or `\"value_right\"` to fill missing columns from left or right with the\n#' left-most or right-most values.\n#' @param extra How to deal with values that return too many new columns after\n#' splitting? Can be `\"drop_left\"` or `\"drop_right\"` to drop the left-most or\n#' right-most values, or `\"merge_left\"` or `\"merge_right\"` to merge the left-\n#' or right-most value together, and keeping all remaining values as is.\n#' @param merge_multiple Logical, if `TRUE` and more than one variable is selected\n#' for separating, new columns can be merged. Value pairs of all split variables\n#' are merged.\n#' @param merge_separator Separator string when `merge_multiple = TRUE`. Defines\n#' the string that is used to merge values together.\n#' @param convert_na Logical, if `TRUE`, character `\"NA\"` values are converted\n#' into real `NA` values.\n#' @param ... Currently not used.\n#' @inheritParams extract_column_names\n#'\n#' @seealso [`data_unite()`]\n#'\n#' @return A data frame with the newly created variable(s), or - when `append = TRUE` -\n#' `data` including new variables.\n#'\n#' @examples\n#' # simple case\n#' d <- data.frame(\n#'   x = c(\"1.a.6\", \"2.b.7\", \"3.c.8\"),\n#'   stringsAsFactors = FALSE\n#' )\n#' d\n#' data_separate(d, new_columns = c(\"a\", \"b\", \"c\"))\n#'\n#' # guess number of columns\n#' d <- data.frame(\n#'   x = c(\"1.a.6\", NA, \"2.b.6.7\", \"3.c\", \"x.y.z\"),\n#'   stringsAsFactors = FALSE\n#' )\n#' d\n#' data_separate(d, guess_columns = \"mode\")\n#'\n#' data_separate(d, guess_columns = \"max\")\n#'\n#' # drop left-most column\n#' data_separate(d, guess_columns = \"mode\", extra = \"drop_left\")\n#'\n#' # merge right-most column\n#' data_separate(d, guess_columns = \"mode\", extra = \"merge_right\")\n#'\n#' # fill columns with fewer values with left-most values\n#' data_separate(d, guess_columns = \"mode\", fill = \"value_left\")\n#'\n#' # fill and merge\n#' data_separate(\n#'   d,\n#'   guess_columns = \"mode\",\n#'   fill = \"value_left\",\n#'   extra = \"merge_right\"\n#' )\n#'\n#' # multiple columns to split\n#' d <- data.frame(\n#'   x = c(\"1.a.6\", \"2.b.7\", \"3.c.8\"),\n#'   y = c(\"x.y.z\", \"10.11.12\", \"m.n.o\"),\n#'   stringsAsFactors = FALSE\n#' )\n#' d\n#' # split two columns, default column names\n#' data_separate(d, guess_columns = \"mode\")\n#'\n#' # split into new named columns, repeating column names\n#' data_separate(d, new_columns = c(\"a\", \"b\", \"c\"))\n#'\n#' # split selected variable new columns\n#' data_separate(d, select = \"y\", new_columns = c(\"a\", \"b\", \"c\"))\n#'\n#' # merge multiple split columns\n#' data_separate(\n#'   d,\n#'   new_columns = c(\"a\", \"b\", \"c\"),\n#'   merge_multiple = TRUE\n#' )\n#'\n#' # merge multiple split columns\n#' data_separate(\n#'   d,\n#'   new_columns = c(\"a\", \"b\", \"c\"),\n#'   merge_multiple = TRUE,\n#'   merge_separator = \"-\"\n#' )\n#'\n#' # separate multiple columns, give proper column names\n#' d_sep <- data.frame(\n#'   x = c(\"1.a.6\", \"2.b.7.d\", \"3.c.8\", \"5.j\"),\n#'   y = c(\"m.n.99.22\", \"77.f.g.34\", \"44.9\", NA),\n#'   stringsAsFactors = FALSE\n#' )\n#'\n#' data_separate(\n#'   d_sep,\n#'   select = c(\"x\", \"y\"),\n#'   new_columns = list(\n#'     x = c(\"A\", \"B\", \"C\"), # separate \"x\" into three columns\n#'     y = c(\"EE\", \"FF\", \"GG\", \"HH\") # separate \"y\" into four columns\n#'   ),\n#'   verbose = FALSE\n#' )\n#' @export\ndata_separate <- function(\n  data,\n  select = NULL,\n  new_columns = NULL,\n  separator = \"[^[:alnum:]]+\",\n  guess_columns = NULL,\n  merge_multiple = FALSE,\n  merge_separator = \"\",\n  fill = \"right\",\n  extra = \"drop_right\",\n  convert_na = TRUE,\n  exclude = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n) {\n  # we need at least one explicit choice for either `new_columns` or `guess_columns`\n  if (is.null(new_columns) && is.null(guess_columns)) {\n    insight::format_error(\n      \"Cannot separate values. Either `new_columns` or `guess_columns` must be provided.\"\n    )\n  }\n  # in case user did not provide names of new columns, we can try\n  # to guess number of columns per variable\n  guess_columns <- match.arg(guess_columns, choices = c(\"min\", \"max\", \"mode\"))\n\n  # make sure we have valid options for fill and extra\n  fill <- match.arg(\n    fill,\n    choices = c(\"left\", \"right\", \"value_left\", \"value_right\")\n  )\n  extra <- match.arg(\n    extra,\n    choices = c(\"drop_left\", \"drop_right\", \"merge_left\", \"merge_right\")\n  )\n\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # make new_columns as list, this works with single and multiple columns\n  if (!is.null(new_columns) && !is.list(new_columns)) {\n    new_columns <- rep(list(new_columns), times = length(select))\n    # if we have multiple columns that were separated, we avoid duplicated\n    # column names of created variables by appending name of original column\n    # however, we don't have duplicated column names when we merge them together\n    # so don't create new column names when \"merge_multiple\" is FALSE.\n    make_unique_colnames <- length(select) > 1 && !merge_multiple\n  } else {\n    # we don't want to create own unique column names when user explicitly\n    # provided column names as a list, i.e. column names for each separated\n    # variable\n    make_unique_colnames <- FALSE\n  }\n\n  # make sure list of new column names is named\n  if (!is.null(new_columns) && is.null(names(new_columns))) {\n    names(new_columns) <- select\n  }\n\n  # iterate columns that should be split\n  split_data <- lapply(select, function(sep_column) {\n    # do we have known number of columns?\n    if (is.null(new_columns)) {\n      n_columns <- NULL\n    } else {\n      n_columns <- length(new_columns[[sep_column]])\n    }\n\n    # make sure we have a character that we can split\n    x <- data[[sep_column]]\n    if (!is.character(x)) {\n      x <- as.character(x)\n    }\n\n    # separate column into multiple strings\n    if (is.numeric(separator)) {\n      maxlen <- max(nchar(x), na.rm = TRUE)\n      starts <- c(0, separator)\n      ends <- c(separator - 1, maxlen)\n      separated_columns <- lapply(seq_along(starts), function(i) {\n        substr(x, starts[i], ends[i])\n      })\n      separated_columns <- as.data.frame(\n        do.call(rbind, separated_columns),\n        stringsAsFactors = FALSE\n      )\n    } else {\n      separated_columns <- strsplit(x, separator, perl = TRUE)\n    }\n\n    # how many new columns do we need?\n    if (is.null(n_columns)) {\n      # lengths of all split strings\n      l <- lengths(separated_columns)\n      # but without NA values\n      l <- l[!vapply(l, function(i) all(is.na(i)), TRUE)]\n      # define number of new columns, based on user-choice\n      n_cols <- switch(\n        guess_columns,\n        min = min(l, na.rm = TRUE),\n        max = max(l, na.rm = TRUE),\n        mode = distribution_mode(l),\n      )\n      # tell user\n      if (verbose && insight::n_unique(l) != 1 && !is.numeric(separator)) {\n        insight::format_alert(paste0(\n          \"Column `\",\n          sep_column,\n          \"` had different number of values after splitting. Variable was split into \",\n          n_cols,\n          \" column\",\n          ifelse(n_cols > 1, \"s\", \"\"),\n          \".\"\n        ))\n      }\n    } else {\n      # else, if we know number of columns, use that number\n      n_cols <- n_columns\n    }\n\n    # main task here - fill or drop values for all columns\n    separated_columns <- tryCatch(\n      .fix_separated_columns(\n        separated_columns,\n        fill,\n        extra,\n        n_cols,\n        sep_column,\n        verbose\n      ),\n      error = function(e) NULL\n    )\n\n    # catch error\n    if (is.null(separated_columns)) {\n      insight::format_error(\n        \"Something went wrong. Probably the number of provided column names did not match number of newly created columns?\" # nolint\n      )\n    }\n\n    # bind separated columns into data frame and set column names\n    out <- as.data.frame(do.call(rbind, separated_columns))\n\n    # if no column names provided, use standard names\n    if (is.null(new_columns[[sep_column]])) {\n      new_column_names <- paste0(sep_column, \"_\", seq_along(out))\n    } else if (make_unique_colnames) {\n      # if we have multiple columns that were separated, we avoid duplicated\n      # column names of created variables by appending name of original column\n      new_column_names <- paste0(sep_column, \"_\", new_columns[[sep_column]])\n    } else {\n      new_column_names <- new_columns[[sep_column]]\n    }\n\n    colnames(out) <- new_column_names\n    out\n  })\n\n  # any split performed?\n  if (all(lengths(split_data) == 1)) {\n    if (verbose) {\n      insight::format_alert(\n        \"Separator probably not found. No values were split. Returning original data.\"\n      )\n    }\n    return(data)\n  }\n\n  # final preparation, bind or merge columns, make unique columm names\n  if (isTRUE(merge_multiple) && length(split_data) > 1) {\n    # we merge all split columns, which are currently saved as list\n    # of data frames, together into one data frame\n    for (i in 2:length(split_data)) {\n      for (j in seq_along(split_data[[1]])) {\n        split_data[[1]][[j]] <- gsub(\n          \" \",\n          \"\",\n          paste(\n            split_data[[1]][[j]],\n            split_data[[i]][[j]],\n            sep = merge_separator\n          ),\n          fixed = TRUE\n        )\n      }\n    }\n    split_data <- split_data[[1]]\n  } else {\n    # bind all columns\n    split_data <- do.call(cbind, split_data)\n  }\n\n  # convert \"NA\" strings into real NA?\n  if (convert_na) {\n    split_data[] <- lapply(split_data, function(i) {\n      i[i == \"NA\"] <- NA_character_\n      i\n    })\n  }\n\n  data <- cbind(data, split_data)\n  if (!isTRUE(append)) {\n    data[select] <- NULL\n  }\n\n  # fin\n  data\n}\n\n\n#' @keywords internal\n.fix_separated_columns <- function(\n  separated_columns,\n  fill,\n  extra,\n  n_cols,\n  sep_column,\n  verbose = TRUE\n) {\n  warn_extra <- warn_fill <- FALSE\n  for (sc in seq_along(separated_columns)) {\n    i <- separated_columns[[sc]]\n    # determine number of values in separated column\n    n_values <- length(i)\n    if (all(is.na(i))) {\n      # we have NA values - so fill everything with NA\n      out <- rep(NA_character_, times = n_cols)\n    } else if (n_values > n_cols) {\n      # we have more values than required - drop extra columns\n      out <- switch(\n        extra,\n        drop_left = i[(n_values - n_cols + 1):n_values],\n        drop_right = i[1:n_cols],\n        merge_left = {\n          tmp <- paste(i[1:(n_values - n_cols + 1)], collapse = \" \")\n          c(tmp, i[(n_values - n_cols + 2):n_values])\n        },\n        {\n          tmp <- i[1:(n_cols - 1)]\n          c(tmp, paste(i[n_cols:n_values], collapse = \" \"))\n        }\n      )\n      warn_extra <- TRUE\n    } else if (n_values < n_cols) {\n      # we have fewer values than required - fill columns\n      out <- switch(\n        fill,\n        left = c(rep(NA_character_, times = n_cols - n_values), i),\n        right = c(i, rep(NA_character_, times = n_cols - n_values)),\n        value_left = c(rep(i[1], times = n_cols - n_values), i),\n        c(i, rep(i[length(i)], times = n_cols - n_values))\n      )\n      warn_fill <- TRUE\n    } else {\n      out <- i\n    }\n    separated_columns[[sc]] <- out\n  }\n\n  if (verbose) {\n    if (warn_extra) {\n      insight::format_alert(paste0(\n        \"`\",\n        sep_column,\n        \"`\",\n        \" returned more columns than expected after splitting. \",\n        switch(\n          extra,\n          drop_left = \"Left-most columns have been dropped.\",\n          drop_right = \"Right-most columns have been dropped.\",\n          merge_left = \"Left-most columns have been merged together.\",\n          merge_right = \"Right-most columns have been merged together.\"\n        )\n      ))\n    }\n    if (warn_fill) {\n      insight::format_alert(paste0(\n        \"`\",\n        sep_column,\n        \"`\",\n        \"returned fewer columns than expected after splitting. \",\n        switch(\n          fill,\n          left = \"Left-most columns were filled with `NA`.\",\n          right = \"Right-most columns were filled with `NA`.\",\n          value_left = \"Left-most columns were filled with first value.\",\n          value_right = \"Right-most columns were filled with last value.\"\n        )\n      ))\n    }\n  }\n\n  separated_columns\n}\n"
  },
  {
    "path": "R/data_summary.R",
    "content": "#' @title Summarize data\n#' @name data_summary\n#'\n#' @description This function can be used to compute summary statistics for a\n#' data frame or a matrix.\n#'\n#' @param x A (grouped) data frame.\n#' @param by Optional character string, indicating the names of one or more\n#' variables in the data frame. If supplied, the data will be split by these\n#' variables and summary statistics will be computed for each group.\n#' @param remove_na Logical. If `TRUE`, missing values are omitted from the\n#' grouping variable. If `FALSE` (default), missing values are included as a\n#' level in the grouping variable.\n#' @param suffix Optional, suffixes to be added to the new variable names,\n#' especially useful when a function returns several values (e.g. `quantile()`).\n#' Can be:\n#' * a character vector: all expressions in `...` must return the same number\n#'    of values as elements in `suffix`.\n#' * a list of named character vectors: the names of elements in `suffix` must\n#'    match the names of the expressions. It is also allowed to specify suffixes\n#'    for selected expressions only.\n#'\n#' The new column names are a combination of the left-hand side (i.e.,\n#' the name) of the expression and the related suffixes. If `suffix = NULL` (the\n#' default), and a summary expression returns multiple values, either the names\n#' of the returned values (if any) or automatically numbered suffixes such as\n#' `_1`, `_2`, etc. are used. See 'Examples'.\n#' @param ... One or more named expressions that define the new variable name\n#' and the function to compute the summary statistic. Example:\n#' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided\n#' as a character string, e.g. `\"mean_sepal_width = mean(Sepal.Width)\"`. The\n#' summary function `n()` can be used to count the number of observations.\n#'\n#' @return A data frame with the requested summary statistics.\n#'\n#' @examples\n#' data(iris)\n#' data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width))\n#' data_summary(\n#'   iris,\n#'   MW = mean(Sepal.Width),\n#'   SD = sd(Sepal.Width),\n#'   by = \"Species\"\n#' )\n#'\n#' # same as\n#' d <- data_group(iris, \"Species\")\n#' data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width))\n#'\n#' # multiple groups\n#' data(mtcars)\n#' data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c(\"am\", \"gear\"))\n#'\n#' # expressions can also be supplied as character strings\n#' data_summary(mtcars, \"MW = mean(mpg)\", \"SD = sd(mpg)\", by = c(\"am\", \"gear\"))\n#'\n#' # count observations within groups\n#' data_summary(mtcars, observations = n(), by = c(\"am\", \"gear\"))\n#'\n#' # first and last observations of \"mpg\" within groups\n#' data_summary(\n#'   mtcars,\n#'   first = mpg[1],\n#'   last = mpg[length(mpg)],\n#'   by = c(\"am\", \"gear\")\n#' )\n#'\n#' # allow more than one-column-summaries for expressions\n#' d <- data.frame(\n#'   x = rnorm(100, 1, 1),\n#'   y = rnorm(100, 2, 2),\n#'   groups = rep(1:4, each = 25)\n#' )\n#'\n#' # since we have multiple columns for one expression, the names of the\n#' # returned summary results are used as suffix by default\n#' data_summary(\n#'   d,\n#'   quant_x = quantile(x, c(0.25, 0.75)),\n#'   mean_x = mean(x),\n#'   quant_y = quantile(y, c(0.25, 0.5, 0.75))\n#' )\n#'\n#' # if a summary function, like `fivenum()`, returns no named vector, suffixes\n#' # are automatically numbered\n#' data_summary(\n#'   d,\n#'   quant_x = quantile(x, c(0.25, 0.75)),\n#'   mean_x = mean(x),\n#'   fivenum_y = fivenum(y)\n#' )\n#'\n#' # specify column suffix for expressions, matching by names\n#' data_summary(\n#'   d,\n#'   quant_x = quantile(x, c(0.25, 0.75)),\n#'   mean_x = mean(x),\n#'   quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n#'   suffix = list(quant_y = c(\"_Q1\", \"_Q2\", \"_Q3\"))\n#' )\n#'\n#' # name multiple expression suffixes, grouped by variable\n#' data_summary(\n#'   d,\n#'   quant_x = quantile(x, c(0.25, 0.75)),\n#'   mean_x = mean(x),\n#'   quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n#'   suffix = list(quant_x = c(\"Q1\", \"Q3\"), quant_y = c(\"_Q1\", \"_Q2\", \"_Q3\")),\n#'   by = \"groups\"\n#' )\n#'\n#' @export\ndata_summary <- function(x, ...) {\n  UseMethod(\"data_summary\")\n}\n\n\n#' @export\ndata_summary.matrix <- function(\n  x,\n  ...,\n  by = NULL,\n  remove_na = FALSE,\n  suffix = NULL\n) {\n  data_summary(\n    as.data.frame(x),\n    ...,\n    by = by,\n    remove_na = remove_na,\n    suffix = suffix\n  )\n}\n\n\n#' @export\ndata_summary.default <- function(x, ...) {\n  insight::format_error(\n    \"`data_summary()` only works for (grouped) data frames and matrices.\"\n  )\n}\n\n\n#' @rdname data_summary\n#' @export\ndata_summary.data.frame <- function(\n  x,\n  ...,\n  by = NULL,\n  remove_na = FALSE,\n  suffix = NULL\n) {\n  dots <- eval(substitute(alist(...)))\n\n  # do we have any expression at all?\n  if (length(dots) == 0) {\n    insight::format_error(\n      \"No expressions for calculating summary statistics provided.\"\n    )\n  }\n\n  if (is.null(by)) {\n    # when we have no grouping, just compute a one-row summary\n    summarise <- .process_datasummary_dots(dots, x, suffix)\n    # coerce to data frame\n    out <- as.data.frame(t(summarise))\n    colnames(out) <- names(summarise)\n  } else {\n    # sanity check - is \"by\" a character string?\n    if (!is.character(by)) {\n      insight::format_error(\n        \"Argument `by` must be a character string indicating the name of variables in the data.\"\n      )\n    }\n    # is \"by\" in the data?\n    if (!all(by %in% colnames(x))) {\n      by_not_found <- by[!by %in% colnames(x)]\n      insight::format_error(\n        paste0(\n          \"Variable\",\n          ifelse(length(by_not_found) > 1, \"s \", \" \"),\n          text_concatenate(by_not_found, enclose = \"\\\"\"),\n          \" not found in the data.\"\n        ),\n        .misspelled_string(colnames(x), by_not_found, \"Possibly misspelled?\")\n      )\n    }\n    # split data, add NA levels, if requested\n    l <- lapply(x[by], function(i) {\n      if (remove_na || !anyNA(i)) {\n        i\n      } else {\n        addNA(i)\n      }\n    })\n    split_data <- split(x, l, drop = TRUE)\n    out <- lapply(split_data, function(s) {\n      # no data for combination? Return NULL\n      if (nrow(s) == 0) {\n        return(NULL)\n      }\n      # summarize data\n      summarise <- .process_datasummary_dots(dots, s, suffix)\n      # coerce to data frame\n      summarised_data <- as.data.frame(t(summarise))\n      # bind grouping-variables and values\n      summarised_data <- cbind(s[1, by], summarised_data)\n      # make sure we have proper column names\n      colnames(summarised_data) <- c(by, names(summarise))\n      summarised_data\n    })\n    # check for correct number of columns. If one expression returns different\n    # number of values (which now means, we have different number of columns\n    # to bind) for each group, tell user\n    if (!all(lengths(out) == lengths(out)[1])) {\n      insight::format_error(\n        \"Each expression must return the same number of values for each group. Some of the expressions seem to return varying numbers of values.\"\n      )\n    }\n    out <- do.call(rbind, out)\n  }\n  # sort data\n  out <- data_arrange(out, select = by)\n  # data attributes\n  class(out) <- c(\"dw_data_summary\", \"data.frame\")\n  rownames(out) <- NULL\n  out\n}\n\n\n#' @export\ndata_summary.grouped_df <- function(\n  x,\n  ...,\n  by = NULL,\n  remove_na = FALSE,\n  suffix = NULL\n) {\n  # extract group variables\n  grps <- attr(x, \"groups\", exact = TRUE)\n  group_variables <- data_remove(grps, \".rows\")\n  # if \"by\" is not supplied, use group variables\n  if (is.null(by)) {\n    by <- colnames(group_variables)\n  }\n  # remove information specific to grouped df's\n  attr(x, \"groups\") <- NULL\n  class(x) <- \"data.frame\"\n  data_summary(x, ..., by = by, remove_na = remove_na, suffix = suffix)\n}\n\n\n# helper -----------------------------------------------------------------------\n\n.process_datasummary_dots <- function(dots, data, suffix = NULL) {\n  out <- NULL\n  if (length(dots)) {\n    # we check for character vector of expressions, in which case\n    # \"dots\" should be unnamed\n    if (is.null(names(dots))) {\n      # if we have multiple strings, concatenate them to a character vector\n      # and put it into a list...\n      if (length(dots) > 1) {\n        if (all(vapply(dots, is.character, logical(1)))) {\n          dots <- list(unlist(dots))\n        } else {\n          insight::format_error(\n            \"You cannot mix string and literal representation of expressions.\"\n          )\n        }\n      }\n      # expression is given as character string, e.g.\n      # a <- \"mean_sepwid = mean(Sepal.Width)\"\n      # data_summary(iris, a, by = \"Species\")\n      # or as character vector, e.g.\n      # data_summary(iris, c(\"var_a = mean(Sepal.Width)\", \"var_b = sd(Sepal.Width)\"))\n      character_symbol <- tryCatch(.dynEval(dots[[1]]), error = function(e) {\n        NULL\n      })\n      # do we have a character vector? Then we can proceed\n      if (is.character(character_symbol)) {\n        dots <- lapply(character_symbol, function(s) {\n          # turn value from character vector into expression\n          str2lang(.dynEval(s))\n        })\n        names(dots) <- vapply(\n          dots,\n          function(n) insight::safe_deparse(n[[2]]),\n          character(1)\n        )\n      }\n    }\n\n    # sanity check: check the input for the `suffix` argument\n    # `suffix` can be NULL, or must be a (named) list\n    if (!is.null(suffix)) {\n      # if `suffix` is a character vector, we transform it into a list,\n      # matching the names of the expressions\n      if (is.character(suffix)) {\n        suffix <- rep(list(suffix), length(dots))\n        names(suffix) <- names(dots)\n      }\n      # no list? error\n      if (!is.list(suffix)) {\n        insight::format_error(\n          \"Argument `suffix` must be a list of (named) character vectors, where the names match the names of the expressions, e.g.:\",\n          paste0(\n            \"`suffix = list(\",\n            names(dots)[1],\n            \" = c(\\\"_suffix1\\\", \\\"_suffix2\\\")`.\"\n          )\n        )\n      }\n      # not all elements named? error\n      if (!length(which(nzchar(names(suffix), keepNA = TRUE)))) {\n        insight::format_error(\"All elements of `suffix` must have names.\")\n      }\n      # names of suffix do not match names of expressions? error\n      if (!all(names(suffix) %in% names(dots))) {\n        wrong_name <- which(!names(suffix) %in% names(dots))[1]\n        insight::format_error(\n          paste0(\n            \"Names of `suffix` must match the names of the expressions. Suffix `\",\n            names(suffix)[wrong_name],\n            \"` has no corresponding expression.\"\n          )\n        )\n      }\n      # identical suffixes for one expression? error\n      identical_suffix <- vapply(\n        suffix,\n        function(i) insight::n_unique(i) != length(i),\n        logical(1)\n      )\n      if (any(identical_suffix)) {\n        insight::format_error(\n          paste0(\n            \"All suffixes for a single expression must be unique. Suffix for element `\",\n            names(identical_suffix)[which(identical_suffix)][1],\n            \"` has duplicate values.\"\n          )\n        )\n      }\n    }\n\n    out <- lapply(seq_along(dots), function(i) {\n      new_variable <- .get_new_dots_variable(dots, i, data)\n      # check special case here - we want bayestestR::ci to work with\n      # data summary, to easily create CIs for, say, posterior draws\n      if (inherits(new_variable, c(\"bayestestR_ci\", \"bayestestR_eti\"))) {\n        stats::setNames(new_variable, c(\"CI\", \"CI_low\", \"CI_high\"))\n      } else {\n        # init\n        current_suffix <- NULL\n        # find matches and set use suffix if found\n        matching_names <- which(names(suffix) == names(dots)[i])\n        # either use suffixes based on matching names, or try to extract\n        # names from the returned summary expression (saved in \"new_variable\"),\n        # if the summary function returned a named vector\n        if (length(matching_names) > 0) {\n          current_suffix <- suffix[[matching_names]]\n        } else if (\n          length(new_variable) > 1 &&\n            all(nzchar(names(new_variable), keepNA = TRUE))\n        ) {\n          current_suffix <- names(new_variable)\n        }\n        # if we don't have suffixes for multiple columns, but expression\n        # returns multiple columns, we get NA column names - we use\n        # automatically numbered suffixes in this case\n        if (is.null(current_suffix) && length(new_variable) > 1) {\n          current_suffix <- paste0(\"_\", seq_along(new_variable))\n        }\n\n        # if number of suffixes does not match the number of returned values\n        # by the expression, error\n        if (\n          !is.null(current_suffix) &&\n            length(current_suffix) != length(new_variable)\n        ) {\n          insight::format_error(\n            paste0(\n              \"Argument `suffix` must have the same length as the result of the corresponding summary expression. `suffix` has \",\n              length(current_suffix),\n              \" elements (\",\n              text_concatenate(current_suffix, enclose = \"`\"),\n              \") for the expression `\",\n              insight::safe_deparse(dots[[i]]),\n              \"`, which returned \",\n              length(new_variable),\n              \" values.\"\n            )\n          )\n        }\n        stats::setNames(new_variable, paste0(names(dots)[i], current_suffix))\n      }\n    })\n  }\n\n  unlist(out)\n}\n\n\n# methods ----------------------------------------------------------------------\n\n#' @export\nprint.dw_data_summary <- function(x, ...) {\n  if (nrow(x) == 0) {\n    cat(\"No matches found.\\n\")\n  } else {\n    if (all(c(\"CI\", \"CI_low\", \"CI_high\") %in% colnames(x))) {\n      ci <- insight::format_table(x[c(\"CI\", \"CI_low\", \"CI_high\")], ...)\n      x$CI <- x$CI_low <- x$CI_high <- NULL\n      x <- cbind(x, ci)\n    }\n    cat(insight::export_table(x, missing = \"<NA>\", ...))\n  }\n}\n"
  },
  {
    "path": "R/data_tabulate.R",
    "content": "#' @title Create frequency and crosstables of variables\n#' @name data_tabulate\n#'\n#' @description This function creates frequency or crosstables of variables,\n#' including the number of levels/values as well as the distribution of raw,\n#' valid and cumulative percentages. For crosstables, row, column and cell\n#' percentages can be calculated.\n#'\n#' @param x A (grouped) data frame, a vector or factor.\n#' @param by Optional vector or factor. If supplied, a crosstable is created.\n#' If `x` is a data frame, `by` can also be a character string indicating the\n#' name of a variable in `x`.\n#' @param drop_levels Logical, if `FALSE`, factor levels that do not occur in\n#' the data are included in the table (with frequency of zero), else unused\n#' factor levels are dropped from the frequency table.\n#' @param name Optional character string, which includes the name that is used\n#' for printing.\n#' @param remove_na Logical, if `FALSE`, missing values are included in the\n#' frequency or crosstable, else missing values are omitted. Note that the\n#' default for the `as.table()` method is `remove_na = TRUE`, so that missing\n#' values are not included in the returned table, which makes more sense for\n#' post-processing of the table, e.g. using `chisq.test()`.\n#' @param collapse Logical, if `TRUE` collapses multiple tables into one larger\n#' table for printing. This affects only printing, not the returned object.\n#' @param weights Optional numeric vector of weights. Must be of the same length\n#' as `x`. If `weights` is supplied, weighted frequencies are calculated.\n#' @param proportions Optional character string, indicating the type of\n#' percentages to be calculated. Only applies to crosstables, i.e. when `by` is\n#' not `NULL`. Can be `\"row\"` (row percentages), `\"column\"` (column percentages)\n#' or `\"full\"` (to calculate relative frequencies for the full table).\n#' @param big_mark Optional character string, indicating the big mark that is\n#' used for large numbers. If `NULL` (default), a big mark is added automatically for\n#' large numbers (i.e. numbers with more than 5 digits). If you want to remove\n#' the big mark, set `big_mark = \"\"`.\n#' @param object An object returned by `data_tabulate()`.\n#' @param format String, indicating the output format. Can be `\"markdown\"`\n#' `\"html\"`, or `\"tt\"`. `format = \"html\"` create an HTML table using the *gt*\n#' package. `format = \"tt\"` creates a `tinytable` object, which is either\n#' printed as markdown or HTML table, depending on the environment. See\n#' [`insight::export_table()`] for details.\n#' @param verbose Toggle warnings and messages.\n#' @param ... not used.\n#' @inheritParams extract_column_names\n#'\n#' @details\n#' There is an `as.data.frame()` method, to return the frequency tables as a\n#' data frame. The structure of the returned object is a nested data frame,\n#' where the first column contains name of the variable for which frequencies\n#' were calculated, and the second column is a list column that contains the\n#' frequency tables as data frame. See [as.table.datawizard_table].\n#'\n#' There is also an `as.table()` method, which returns a table object with the\n#' frequencies of the variable. This is useful for further statistical analysis,\n#' e.g. for using `chisq.test()` on the frequency table. See\n#' [as.table.datawizard_table].\n#'\n#' @section Crosstables:\n#' If `by` is supplied, a crosstable is created. The crosstable includes `<NA>`\n#' (missing) values by default. The first column indicates values of `x`, the\n#' first row indicates values of `by` (including missing values). The last row\n#' and column contain the total frequencies for each row and column, respectively.\n#' Setting `remove_na = FALSE` will omit missing values from the crosstable.\n#' Setting `proportions` to `\"row\"` or `\"column\"` will add row or column\n#' percentages. Setting `proportions` to `\"full\"` will add relative frequencies\n#' for the full table.\n#'\n#' @note\n#' There are `print_html()` and `print_md()` methods available for printing\n#' frequency or crosstables in HTML and markdown format, e.g.\n#' `print_html(data_tabulate(x))`. The `print()` method for text outputs passes\n#' arguments in `...` to [`insight::export_table()`].\n#'\n#' @return A data frame, or a list of data frames, with one frequency table\n#' as data frame per variable.\n#'\n#' @seealso [as.prop.table]\n#'\n#' @examplesIf requireNamespace(\"poorman\")\n#' # frequency tables -------\n#' # ------------------------\n#' data(efc)\n#'\n#' # vector/factor\n#' data_tabulate(efc$c172code)\n#'\n#' # drop missing values\n#' data_tabulate(efc$c172code, remove_na = TRUE)\n#'\n#' # data frame\n#' data_tabulate(efc, c(\"e42dep\", \"c172code\"))\n#'\n#' # grouped data frame\n#' suppressPackageStartupMessages(library(poorman, quietly = TRUE))\n#' efc %>%\n#'   group_by(c172code) %>%\n#'   data_tabulate(\"e16sex\")\n#'\n#' # collapse tables\n#' efc %>%\n#'   group_by(c172code) %>%\n#'   data_tabulate(\"e16sex\", collapse = TRUE)\n#'\n#' # for larger N's (> 100000), a big mark is automatically added\n#' set.seed(123)\n#' x <- sample(1:3, 1e6, TRUE)\n#' data_tabulate(x, name = \"Large Number\")\n#'\n#' # to remove the big mark, use \"print(..., big_mark = \"\")\"\n#' print(data_tabulate(x), big_mark = \"\")\n#'\n#' # weighted frequencies\n#' set.seed(123)\n#' efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n#' data_tabulate(efc$e42dep, weights = efc$weights)\n#'\n#' # crosstables ------\n#' # ------------------\n#'\n#' # add some missing values\n#' set.seed(123)\n#' efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n#'\n#' data_tabulate(efc, \"c172code\", by = \"e16sex\")\n#'\n#' # add row and column percentages\n#' data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"row\")\n#' data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\")\n#'\n#' # omit missing values\n#' data_tabulate(\n#'   efc$c172code,\n#'   by = efc$e16sex,\n#'   proportions = \"column\",\n#'   remove_na = TRUE\n#' )\n#'\n#' # round percentages\n#' out <- data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\")\n#' print(out, digits = 0)\n#' @export\ndata_tabulate <- function(x, ...) {\n  UseMethod(\"data_tabulate\")\n}\n\n\n#' @rdname data_tabulate\n#' @export\ndata_tabulate.default <- function(\n  x,\n  by = NULL,\n  drop_levels = FALSE,\n  weights = NULL,\n  remove_na = FALSE,\n  proportions = NULL,\n  name = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # save label attribute, before it gets lost...\n  var_label <- attr(x, \"label\", exact = TRUE)\n\n  # save and fix variable name, check for grouping variable\n  obj_name <- tryCatch(\n    insight::safe_deparse(substitute(x)),\n    error = function(e) NULL\n  )\n  if (identical(obj_name, \"x[[i]]\")) {\n    obj_name <- name\n  }\n  group_variable <- list(...)$group_variable\n\n  # check whether levels not present in data should be shown or not\n  if (is.factor(x) && isTRUE(drop_levels)) {\n    x <- droplevels(x)\n  }\n\n  # validate \"weights\"\n  weights <- .validate_table_weights(\n    weights,\n    x,\n    weights_expression = insight::safe_deparse(substitute(weights))\n  )\n\n  # we go into another function for crosstables here...\n  if (!is.null(by)) {\n    by <- .validate_by(by, x)\n    return(.crosstable(\n      x,\n      by = by,\n      weights = weights,\n      remove_na = remove_na,\n      proportions = proportions,\n      obj_name = obj_name,\n      group_variable = group_variable\n    ))\n  }\n\n  # frequency table\n  if (is.null(weights)) {\n    if (remove_na) {\n      # we have a `.default` and a `.data.frame` method for `data_tabulate()`.\n      # since this is the default, `x` can be an object which cannot be used\n      # with `table()`, that's why we add `tryCatch()` here. Below we give an\n      # informative error message for non-supported objects.\n      freq_table <- tryCatch(table(x), error = function(e) NULL)\n    } else {\n      freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)\n    }\n  } else if (remove_na) {\n    # weighted frequency table, excluding NA\n    freq_table <- tryCatch(\n      stats::xtabs(\n        weights ~ x,\n        data = data.frame(weights = weights, x = x),\n        na.action = stats::na.omit,\n        addNA = FALSE\n      ),\n      error = function(e) NULL\n    )\n  } else {\n    # weighted frequency table, including NA\n    freq_table <- tryCatch(\n      stats::xtabs(\n        weights ~ x,\n        data = data.frame(weights = weights, x = addNA(x)),\n        na.action = stats::na.pass,\n        addNA = TRUE\n      ),\n      error = function(e) NULL\n    )\n  }\n\n  if (is.null(freq_table)) {\n    insight::format_warning(paste0(\n      \"Can't compute frequency tables for objects of class `\",\n      class(x)[1],\n      \"`.\"\n    ))\n    return(NULL)\n  }\n\n  # create data frame with freq table and cumulative percentages etc.\n  out <- data_rename(\n    data.frame(freq_table, stringsAsFactors = FALSE),\n    replacement = c(\"Value\", \"N\")\n  )\n\n  # we want to round N for weighted frequencies\n  if (!is.null(weights)) {\n    out$N <- round(out$N)\n  }\n\n  out$`Raw %` <- 100 * out$N / sum(out$N)\n  # if we have missing values, we add a row with NA\n  if (remove_na) {\n    out$`Valid %` <- 100 * out$N / sum(out$N)\n    valid_n <- sum(out$N, na.rm = TRUE)\n  } else {\n    out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA)\n    valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE)\n  }\n  out$`Cumulative %` <- cumsum(out$`Valid %`)\n\n  # add information about variable/group names\n  if (!is.null(obj_name)) {\n    if (is.null(group_variable)) {\n      var_info <- data.frame(Variable = obj_name, stringsAsFactors = FALSE)\n    } else {\n      var_info <- data.frame(\n        Variable = obj_name,\n        Group = toString(lapply(colnames(group_variable), function(i) {\n          sprintf(\"%s (%s)\", i, group_variable[[i]])\n        })),\n        stringsAsFactors = FALSE\n      )\n    }\n    out <- cbind(var_info, out)\n  }\n\n  # save information\n  attr(out, \"type\") <- .variable_type(x)\n  attr(out, \"varname\") <- name\n  attr(out, \"label\") <- var_label\n  attr(out, \"object\") <- obj_name\n  attr(out, \"group_variable\") <- group_variable\n  attr(out, \"duplicate_varnames\") <- duplicated(out$Variable)\n  attr(out, \"weights\") <- weights\n\n  attr(out, \"total_n\") <- sum(out$N, na.rm = TRUE)\n  attr(out, \"valid_n\") <- valid_n\n\n  class(out) <- c(\"datawizard_table\", \"data.frame\")\n\n  out\n}\n\n\n#' @rdname data_tabulate\n#' @export\ndata_tabulate.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  by = NULL,\n  drop_levels = FALSE,\n  weights = NULL,\n  remove_na = FALSE,\n  proportions = NULL,\n  collapse = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # validate \"by\"\n  by <- .validate_by(by, x)\n  # validate \"weights\"\n  weights <- .validate_table_weights(weights, x)\n\n  out <- lapply(select, function(i) {\n    data_tabulate(\n      x[[i]],\n      by = by,\n      proportions = proportions,\n      drop_levels = drop_levels,\n      weights = weights,\n      remove_na = remove_na,\n      name = i,\n      verbose = verbose,\n      ...\n    )\n  })\n\n  if (is.null(by)) {\n    class(out) <- c(\"datawizard_tables\", \"list\")\n  } else {\n    class(out) <- c(\"datawizard_crosstabs\", \"list\")\n  }\n  attr(out, \"collapse\") <- isTRUE(collapse)\n  attr(out, \"is_weighted\") <- !is.null(weights)\n\n  out\n}\n\n\n#' @export\ndata_tabulate.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  by = NULL,\n  proportions = NULL,\n  drop_levels = FALSE,\n  weights = NULL,\n  remove_na = FALSE,\n  collapse = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  grps <- attr(x, \"groups\", exact = TRUE)\n  group_variables <- data_remove(grps, \".rows\")\n  grps <- grps[[\".rows\"]]\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  x <- as.data.frame(x)\n\n  out <- list()\n  for (i in seq_along(grps)) {\n    rows <- grps[[i]]\n    # save information about grouping factors\n    if (is.null(group_variables)) {\n      group_variable <- NULL\n    } else {\n      group_variable <- group_variables[i, , drop = FALSE]\n    }\n    out <- c(\n      out,\n      data_tabulate(\n        data_filter(x, rows),\n        select = select,\n        exclude = exclude,\n        ignore_case = ignore_case,\n        verbose = verbose,\n        drop_levels = drop_levels,\n        weights = weights,\n        remove_na = remove_na,\n        by = by,\n        proportions = proportions,\n        group_variable = group_variable,\n        ...\n      )\n    )\n  }\n  if (is.null(by)) {\n    class(out) <- c(\"datawizard_tables\", \"list\")\n  } else {\n    class(out) <- c(\"datawizard_crosstabs\", \"list\")\n  }\n  attr(out, \"collapse\") <- isTRUE(collapse)\n  attr(out, \"is_weighted\") <- !is.null(weights)\n\n  out\n}\n\n\n# methods --------------------\n\n#' @importFrom insight print_html\n#' @export\ninsight::print_html\n\n\n#' @importFrom insight print_md\n#' @export\ninsight::print_md\n\n\n#' @importFrom insight display\n#' @export\ninsight::display\n\n#' Convert a crosstable to a frequency or a propensity table\n#'\n#' @description\n#' `as.prop.table()` is an S3 generic. It can be used on objects of class\n#' `datawizard_crosstab` created by `data_tabulate()` when it was run with the\n#' arguments `by` and `proportions`.\n#'\n#' @param x An object created by `data_tabulate()`. It must be of class\n#' `datawizard_crosstab` for `as.prop.table()`.\n#' @param simplify Logical, if `TRUE`, the returned table is simplified to a\n#' single table object if there is only one frequency or contingency table\n#' input. Else, always for multiple table inputs or when `simplify = FALSE`, a\n#' list of tables is returned. This is only relevant for the `as.table()`\n#' methods. To ensure consistent output, the default is `FALSE`.\n#' @inheritParams data_tabulate\n#'\n#' @export\n#' @seealso [data_tabulate]\n#'\n#' @examples\n#' data(efc)\n#'\n#' # Some cross tabulation\n#' cross <- data_tabulate(efc, select = \"e42dep\", by = \"c172code\", proportions = \"row\")\n#' cross\n#'\n#' # Convert to a propensity table\n#' as.prop.table(cross)\n#'\n#' # Convert to data.frame\n#' result <- data_tabulate(efc, \"c172code\", by = \"e16sex\")\n#' as.data.frame(result)\n#' as.data.frame(result)$table\n#' as.data.frame(result, add_total = TRUE)$table\n#'\n#' # Convert to a table that can be passed to chisq.test()\n#'\n#' out <- data_tabulate(efc, \"c172code\", by = \"e16sex\")\n#' # we need to simplify the output, else we get a list of tables\n#' tbl <- as.table(out, simplify = TRUE)\n#' tbl\n#' suppressWarnings(chisq.test(tbl))\n#'\n#' # apply chisq.test to each table\n#' out <- data_tabulate(efc, c(\"c172code\", \"e16sex\"))\n#' suppressWarnings(lapply(as.table(out), chisq.test))\n#'\n#' # can also handle grouped data frames\n#' d <- data_group(mtcars, \"am\")\n#' x <- data_tabulate(d, \"cyl\", by = \"gear\")\n#' as.table(x)\nas.prop.table <- function(x, ...) {\n  UseMethod(\"as.prop.table\")\n}\n\n#' @rdname as.prop.table\n#' @export\nas.prop.table.datawizard_crosstab <- function(\n  x,\n  remove_na = TRUE,\n  simplify = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # sanity check - the `.data.frame` method  returns a list, but not the\n  # default method\n  if (!is.data.frame(x)) {\n    x <- x[[1]]\n  }\n  prop_table <- attributes(x)$prop_table\n\n  if (is.null(prop_table)) {\n    insight::format_warning(\"No proportions available.\")\n    return(NULL)\n  }\n\n  if (remove_na) {\n    if (\n      verbose &&\n        (\"NA\" %in% colnames(prop_table) || \"NA\" %in% rownames(prop_table))\n    ) {\n      insight::format_alert(\"Removing NA values from frequency table.\")\n    }\n    if (!is.null(prop_table[[\"NA\"]])) {\n      prop_table[[\"NA\"]] <- NULL\n    }\n    if (\"NA\" %in% rownames(prop_table)) {\n      prop_table <- prop_table[rownames(prop_table) != \"NA\", ]\n    }\n  }\n  # coerce to table\n  result <- as.table(as.matrix(prop_table))\n  # if we don't want to simplify the table, we wrap it into a list\n  if (!simplify) {\n    result <- list(result)\n  }\n\n  result\n}\n\n#' @export\nas.prop.table.datawizard_crosstabs <- function(\n  x,\n  remove_na = TRUE,\n  simplify = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # only show message once we set `verbose = FALSE` in the lapply()\n  if (remove_na && verbose) {\n    prop_table <- attributes(x[[1]])$prop_table\n    if (\"NA\" %in% colnames(prop_table) || \"NA\" %in% rownames(prop_table)) {\n      insight::format_alert(\"Removing NA values from frequency table.\")\n    }\n  }\n\n  out <- insight::compact_list(lapply(\n    x,\n    as.prop.table.datawizard_crosstab,\n    remove_na = remove_na,\n    simplify = TRUE,\n    verbose = FALSE,\n    ...\n  ))\n\n  # if no proportions found, return NULL\n  if (!length(out)) {\n    return(NULL)\n  }\n\n  # if only one table is returned, \"unlist\"\n  if (length(out) == 1 && simplify) {\n    out <- out[[1]]\n  }\n  out\n}\n\n\n# as.data.frame --------------------\n\n#' @rdname as.prop.table\n#' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and\n#' column with the total N values are added to the data frame. `add_total` has\n#' no effect in `as.data.frame()` for simple frequency tables.\n#' @inheritParams base::as.data.frame\n#' @export\nas.data.frame.datawizard_tables <- function(\n  x,\n  row.names = NULL,\n  optional = FALSE,\n  ...,\n  stringsAsFactors = FALSE,\n  add_total = FALSE\n) {\n  # extract variables of frequencies\n  selected_vars <- unlist(lapply(x, function(i) attributes(i)$varname))\n  # coerce to data frame, remove rownames\n  data_frames <- lapply(x, function(i) {\n    # the `format()` methods for objects returned by `data_tabulate()` call\n    # `as.data.frame()` - we have to pay attention to avoid infinite iterations\n    # here. At the moment, this is no problem, as objects we have at this stage\n    # are of class \"datawizard_table\" or \"datawizard_crosstab\", while this\n    # `as.data.frame()` method is only called for \"datawizard_tables\" (the plural)\n    # form). Else, we would need to modify the class attribute here,\n    # e.g. class(i) <- \"data.frame\"\n    if (add_total) {\n      # to add the total column and row, we simply can call `format()`\n      out <- as.data.frame(format(i))\n      for (cols in 2:ncol(out)) {\n        # since \"format()\" returns a character matrix, we want to convert\n        # the columns to numeric. We have to exclude the first column, as the\n        # first column is character, due to the added \"Total\" value.\n        out[[cols]] <- as.numeric(out[[cols]])\n      }\n      # after formatting, we have a \"separator\" row for nicer printing.\n      # this should also be removed\n      out <- remove_empty_rows(out)\n    } else {\n      out <- as.data.frame(i)\n    }\n    rownames(out) <- NULL\n    out\n  })\n  # create nested data frame\n  result <- data.frame(\n    var = selected_vars,\n    table = I(data_frames),\n    stringsAsFactors = stringsAsFactors\n  )\n  # consider additional arguments\n  rownames(result) <- row.names\n  result\n}\n\n#' @export\nas.data.frame.datawizard_crosstabs <- as.data.frame.datawizard_tables\n\n\n# as.table --------------------\n\n#' @rdname as.prop.table\n#' @export\nas.table.datawizard_table <- function(\n  x,\n  remove_na = TRUE,\n  simplify = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # sanity check - the `.data.frame` method (data_tabulate(mtcars, \"cyl\"))\n  # returns a list, but not the default method (data_tabulate(mtcars$cyl))\n  if (!is.data.frame(x)) {\n    x <- x[[1]]\n  }\n  # check if any table has NA values - the column \"Value\" contains the value\n  # \"NA\", and the column \"N\" contains the frequency of this value.\n  if (remove_na) {\n    # .check_table_na() works on lists of data frames, so we wrap the data frame\n    # into a list here\n    if (verbose && .check_table_na(list(x))) {\n      insight::format_alert(\"Removing NA values from frequency table.\")\n    }\n    # remove NA values from the table\n    x <- x[!is.na(x$Value), , drop = FALSE]\n  }\n  # coerce to table\n  result <- as.table(stats::setNames(x[[\"N\"]], x$Value))\n  # if we don't want to simplify the table, we wrap it into a list\n  if (!simplify) {\n    result <- list(result)\n  }\n\n  result\n}\n\n#' @export\nas.table.datawizard_tables <- function(\n  x,\n  remove_na = TRUE,\n  simplify = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # only show message once we set `verbose = FALSE` in the lapply()\n  if (remove_na && verbose && .check_table_na(x)) {\n    insight::format_alert(\"Removing NA values from frequency table.\")\n  }\n\n  out <- lapply(\n    x,\n    as.table.datawizard_table,\n    remove_na = remove_na,\n    # no nested lists\n    simplify = TRUE,\n    # no multiple messages\n    verbose = FALSE,\n    ...\n  )\n  # if only one table is returned, \"unlist\"\n  if (length(out) == 1 && simplify) {\n    out <- out[[1]]\n  }\n  out\n}\n\n#' @export\nas.table.datawizard_crosstab <- function(\n  x,\n  remove_na = TRUE,\n  simplify = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # sanity check - the `.data.frame` method  returns a list, but not the\n  # default method\n  if (!is.data.frame(x)) {\n    x <- x[[1]]\n  }\n  # check for grouped df - we need to remove the \"Group\" column\n  if (.is_grouped_df_xtab(x)) {\n    x$Group <- NULL\n  }\n  # first column contains the row names\n  row_names <- as.character(x[[1]])\n  row_names[is.na(row_names)] <- \"NA\"\n  # remove first column, set rownames\n  x[[1]] <- NULL\n  rownames(x) <- row_names\n\n  if (remove_na) {\n    if (verbose && .check_xtable_na(list(x))) {\n      insight::format_alert(\"Removing NA values from frequency table.\")\n    }\n    if (!is.null(x[[\"NA\"]])) {\n      x[[\"NA\"]] <- NULL\n    }\n    if (\"NA\" %in% row_names) {\n      x <- x[row_names != \"NA\", ]\n    }\n  }\n  # coerce to table\n  result <- as.table(as.matrix(x))\n  # if we don't want to simplify the table, we wrap it into a list\n  if (!simplify) {\n    result <- list(result)\n  }\n\n  result\n}\n\n#' @export\nas.table.datawizard_crosstabs <- function(\n  x,\n  remove_na = TRUE,\n  simplify = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # only show message once we set `verbose = FALSE` in the lapply()\n  if (remove_na && verbose && .check_xtable_na(x)) {\n    insight::format_alert(\"Removing NA values from frequency table.\")\n  }\n\n  out <- lapply(\n    x,\n    as.table.datawizard_crosstab,\n    remove_na = remove_na,\n    simplify = TRUE,\n    verbose = FALSE,\n    ...\n  )\n  # if only one table is returned, \"unlist\"\n  if (length(out) == 1 && simplify) {\n    out <- out[[1]]\n  }\n  # if we have a grouped data frame, we save the grouping values as\n  # names for the list\n  if (.is_grouped_df_xtab(x)) {\n    names(out) <- unlist(\n      lapply(x, function(i) {\n        i$Group[1]\n      }),\n      use.names = FALSE\n    )\n  }\n  out\n}\n\n\n.is_grouped_df_xtab <- function(x) {\n  if (!is.data.frame(x)) {\n    x <- x[[1]]\n  }\n  isTRUE(attributes(x)$grouped_df)\n}\n\n\n.check_table_na <- function(x) {\n  # check if any table has NA values - the column \"Value\" contains the value\n  # \"NA\", and the column \"N\" contains the frequency of this value.\n  any(vapply(x, function(i) any(i$N[is.na(i$Value)] > 0), logical(1)))\n}\n\n\n.check_xtable_na <- function(x) {\n  any(vapply(\n    x,\n    function(i) {\n      # need to extract rownames, to check if we have a \"NA\" row\n      row_names <- as.character(i[[1]])\n      row_names[is.na(row_names)] <- \"NA\"\n      has_na <- FALSE\n      # check for NA columns and rows\n      if (!is.null(i[[\"NA\"]])) {\n        has_na <- any(i[[\"NA\"]] > 0)\n      }\n      if (\"NA\" %in% row_names) {\n        # for grouped data frames, we need to remove the \"Group\" column, else\n        # the indexing -1 below won't work\n        if (.is_grouped_df_xtab(i)) {\n          i$Group <- NULL\n        }\n        # we need \"as.data.frame()\" for grouped df, else `as.vector()` fails\n        has_na <- has_na |\n          any(as.vector(as.data.frame(i[row_names == \"NA\", -1])) > 0)\n      }\n      has_na\n    },\n    logical(1)\n  ))\n}\n\n\n# format --------------------\n\n#' @export\nformat.datawizard_table <- function(x, format = \"text\", big_mark = NULL, ...) {\n  # convert to character manually, else, for large numbers,\n  # format_table() returns scientific notation\n  x <- as.data.frame(x)\n  x$N <- as.character(x$N)\n\n  # format data frame\n  ftab <- insight::format_table(x, ...)\n  ftab[] <- lapply(ftab, function(i) {\n    i[i == \"\"] <- ifelse(identical(format, \"text\"), \"<NA>\", \"(NA)\") # nolint\n    i\n  })\n  ftab$N <- gsub(\"\\\\.00$\", \"\", ftab$N)\n\n  # insert big marks?\n  ftab$N <- .add_commas_in_numbers(ftab$N, big_mark)\n\n  ftab\n}\n\n.add_commas_in_numbers <- function(x, big_mark = NULL) {\n  # sanity checks - for crosstables with `remove_na = FALSE`, nchar(x) fails,\n  # and pretty() warns about non-numeric input. Thus, we skip if any NA value\n  # is in `x`.\n  if (anyNA(x)) {\n    return(x)\n  }\n  # automatically add a big mark for large numbers\n  if (is.null(big_mark) && any(nchar(x) > 5)) {\n    big_mark <- \",\"\n  }\n  if (identical(big_mark, \"\")) {\n    return(x)\n  }\n  if (!is.null(big_mark)) {\n    x <- prettyNum(x, big.mark = big_mark)\n  }\n\n  x\n}\n\n\n# print --------------------\n\n#' @rdname data_tabulate\n#' @export\nprint.datawizard_table <- function(x, big_mark = NULL, ...) {\n  a <- attributes(x)\n\n  # \"table\" header with variable label/name, and type\n  cat(.table_header(x, \"text\"))\n\n  # grouped data? if yes, add information on grouping factor\n  if (!is.null(a$group_variable)) {\n    group_title <- paste0(\n      \"Grouped by \",\n      toString(lapply(colnames(a$group_variable), function(i) {\n        sprintf(\"%s (%s)\", i, a$group_variable[[i]])\n      }))\n    )\n    cat(insight::print_color(group_title, \"blue\"))\n    cat(\"\\n\")\n  }\n\n  a$total_n <- .add_commas_in_numbers(a$total_n, big_mark)\n  a$valid_n <- .add_commas_in_numbers(a$valid_n, big_mark)\n\n  # summary of total and valid N (we may add mean/sd as well?)\n  summary_line <- sprintf(\n    \"# total N=%s valid N=%s%s\\n\\n\",\n    a$total_n,\n    a$valid_n,\n    ifelse(is.null(a$weights), \"\", \" (weighted)\")\n  )\n  cat(insight::print_color(summary_line, \"blue\"))\n\n  # remove information that goes into the header/footer\n  x$Variable <- NULL\n  x$Group <- NULL\n\n  # print table\n  cat(insight::export_table(\n    format(x, big_mark = big_mark, ...),\n    cross = \"+\",\n    missing = \"<NA>\",\n    ...\n  ))\n  invisible(x)\n}\n\n\n#' @export\nprint.datawizard_tables <- function(x, big_mark = NULL, ...) {\n  # check if we have weights\n  is_weighted <- isTRUE(attributes(x)$is_weighted)\n\n  a <- attributes(x)\n  if (!isTRUE(a$collapse) || length(x) == 1) {\n    for (i in seq_along(x)) {\n      print(x[[i]], big_mark = big_mark, ...)\n      if (i < length(x)) cat(\"\\n\")\n    }\n  } else {\n    x <- lapply(x, function(i) {\n      i_attr <- attributes(i)\n      i <- format(i, format = \"text\", big_mark = big_mark, ...)\n      if (!is.null(i$Variable)) {\n        i$Variable[i_attr$duplicate_varnames] <- \"\"\n      }\n      if (!is.null(i$Group)) {\n        i$Group[i_attr$duplicate_varnames] <- \"\"\n      }\n      i[nrow(i) + 1, ] <- \"\"\n      i\n    })\n\n    out <- do.call(rbind, x)\n    if (is_weighted) {\n      cat(insight::print_color(\"# Frequency Table (weighted)\\n\\n\", \"blue\"))\n    } else {\n      cat(insight::print_color(\"# Frequency Table\\n\\n\", \"blue\"))\n    }\n\n    # print table\n    cat(insight::export_table(\n      out,\n      missing = \"<NA>\",\n      cross = \"+\",\n      empty_line = \"-\",\n      ...\n    ))\n  }\n}\n\n\n# display --------------------\n\n#' @rdname data_tabulate\n#' @export\ndisplay.datawizard_table <- function(\n  object,\n  big_mark = NULL,\n  format = \"markdown\",\n  ...\n) {\n  format <- .display_default_format(format)\n\n  fun_args <- list(\n    x = object,\n    big_mark = big_mark,\n    ...\n  )\n\n  # print table in HTML or markdown format\n  if (format %in% c(\"html\", \"tt\")) {\n    fun_args$backend <- format\n    do.call(print_html, fun_args)\n  } else {\n    do.call(print_md, fun_args)\n  }\n}\n\n#' @export\ndisplay.datawizard_tables <- display.datawizard_table\n\n#' @export\ndisplay.datawizard_crosstab <- display.datawizard_table\n\n#' @export\ndisplay.datawizard_crosstabs <- display.datawizard_table\n\n.display_default_format <- function(format) {\n  format <- getOption(\"easystats_display_format\", format)\n  insight::validate_argument(format, c(\"markdown\", \"html\", \"md\", \"tt\"))\n}\n\n\n# print_html --------------------\n\n#' @export\nprint_html.datawizard_table <- function(x, big_mark = NULL, ...) {\n  .print_dw_table(x, format = \"html\", big_mark = big_mark, ...)\n}\n\n\n#' @export\nprint_html.datawizard_tables <- function(x, big_mark = NULL, ...) {\n  # check if we have weights\n  is_weighted <- isTRUE(attributes(x)$is_weighted)\n\n  if (length(x) == 1) {\n    print_html(x[[1]], big_mark = big_mark, ...)\n  } else {\n    x <- lapply(x, function(i) {\n      i_attr <- attributes(i)\n      i <- format(i, format = \"html\", big_mark = big_mark, ...)\n      if (!is.null(i$Variable)) {\n        i$Variable[i_attr$duplicate_varnames] <- \"\"\n      }\n      i\n    })\n\n    out <- do.call(rbind, x)\n\n    # print table\n    insight::export_table(\n      out,\n      missing = \"<NA>\",\n      caption = ifelse(\n        is_weighted,\n        \"Frequency Table (weighted)\",\n        \"Frequency Table\"\n      ),\n      format = .check_format_backend(...),\n      group_by = \"Group\"\n    )\n  }\n}\n\n\n# print_md --------------------\n\n#' @export\nprint_md.datawizard_table <- function(x, big_mark = NULL, ...) {\n  .print_dw_table(x, format = \"markdown\", big_mark = big_mark, ...)\n}\n\n\n#' @export\nprint_md.datawizard_tables <- function(x, big_mark = NULL, ...) {\n  # check if we have weights\n  is_weighted <- isTRUE(attributes(x)$is_weighted)\n\n  if (length(x) == 1) {\n    print_md(x[[1]], big_mark = big_mark, ...)\n  } else {\n    x <- lapply(x, function(i) {\n      i_attr <- attributes(i)\n      i <- format(i, format = \"markdown\", big_mark = big_mark, ...)\n      if (!is.null(i$Variable)) {\n        i$Variable[i_attr$duplicate_varnames] <- \"\"\n      }\n      if (!is.null(i$Group)) {\n        i$Group[i_attr$duplicate_varnames] <- \"\"\n      }\n      i[nrow(i) + 1, ] <- \"\"\n      i\n    })\n\n    out <- do.call(rbind, x)\n\n    # print table\n    insight::export_table(\n      out,\n      missing = \"(NA)\",\n      empty_line = \"-\",\n      format = \"markdown\",\n      title = ifelse(\n        is_weighted,\n        \"Frequency Table (weighted)\",\n        \"Frequency Table\"\n      )\n    )\n  }\n}\n\n\n# tools --------------------\n\n.print_dw_table <- function(x, format = \"markdown\", big_mark = NULL, ...) {\n  a <- attributes(x)\n\n  # \"table\" header with variable label/name, and type\n  caption <- .table_header(x, format)\n\n  # summary of total and valid N (we may add mean/sd as well?)\n  footer <- sprintf(\n    \"total N=%i valid N=%i%s%s\",\n    a$total_n,\n    a$valid_n,\n    ifelse(is.null(a$weights), \"\", \" (weighted)\"),\n    ifelse(format == \"markdown\", \"\\n\\n\", \"\")\n  )\n\n  # remove information that goes into the header/footer\n  x$Variable <- NULL\n  x$Group <- NULL\n\n  # this function is used by all four supported format, markdown, text, html\n  # and tt (tinytable). For tt, we sometimes have format \"html\" and backend = \"tt\",\n  # so we need to check for this special case\n  backend <- switch(format, html = , tt = .check_format_backend(...), format)\n  # print table\n  insight::export_table(\n    format(x, format = format, big_mark = big_mark, ...),\n    title = caption,\n    footer = footer,\n    missing = \"(NA)\",\n    format = backend\n  )\n}\n\n\n# we allow exporting HTML format based on \"gt\" or \"tinytable\"\n.check_format_backend <- function(...) {\n  dots <- list(...)\n  if (identical(dots$backend, \"tt\")) {\n    \"tt\"\n  } else {\n    \"html\"\n  }\n}\n\n\n.table_header <- function(x, format = \"text\") {\n  a <- attributes(x)\n\n  # assemble name, based on what information is available\n  name <- NULL\n  # fix object name\n  if (identical(a$object, \"x[[i]]\")) {\n    a$object <- NULL\n  }\n  if (!is.null(a$label)) {\n    name <- a$label\n    if (!is.null(a$varname)) {\n      name <- paste0(name, \" (\", a$varname, \")\")\n    } else if (!is.null(a$object)) {\n      name <- paste0(name, \" (\", a$object, \")\")\n    }\n  } else if (!is.null(a$varname)) {\n    name <- a$varname\n    if (!is.null(a$object)) {\n      name <- paste0(name, \" (\", a$object, \")\")\n    }\n  }\n\n  if (is.null(name) && !is.null(a$object)) {\n    name <- a$object\n  }\n\n  # \"table\" header with variable label/name, and type\n  if (identical(format, \"text\")) {\n    out <- paste(\n      insight::color_text(name, \"red\"),\n      insight::color_text(sprintf(\"<%s>\\n\", a$type), \"blue\")\n    )\n  } else {\n    out <- paste0(name, \" (\", a$type, \")\")\n  }\n\n  out\n}\n\n\n.variable_type <- function(x) {\n  if (is.ordered(x)) {\n    vt <- \"ord\"\n  } else if (is.factor(x)) {\n    vt <- \"fct\"\n  } else if (class(x)[1] == \"Date\") {\n    vt <- \"date\"\n  } else {\n    vt <- switch(\n      typeof(x),\n      logical = \"lgl\",\n      integer = \"int\",\n      double = \"dbl\",\n      character = \"chr\",\n      complex = \"cpl\",\n      closure = \"fn\",\n      environment = \"env\",\n      typeof(x)\n    )\n  }\n\n  switch(\n    vt,\n    ord = \"ordinal\",\n    fct = \"categorical\",\n    dbl = \"numeric\",\n    int = \"integer\",\n    chr = \"character\",\n    lbl = \"labelled\",\n    cpl = \"complex\",\n    lgl = \"logical\",\n    vt\n  )\n}\n"
  },
  {
    "path": "R/data_to_long.R",
    "content": "#' @title Reshape (pivot) data from wide to long\n#' @name data_to_long\n#'\n#' @description\n#' This function \"lengthens\" data, increasing the number of rows and decreasing\n#' the number of columns. This is a dependency-free base-R equivalent of\n#' `tidyr::pivot_longer()`.\n#'\n#' @param data A data frame to convert to long format, so that it has more\n#' rows and fewer columns after the operation.\n#' @param names_to The name of the new column (variable) that will contain the\n#' _names_ from columns in `select` as values, to identify the source of the\n#' values. `names_to` can be a character vector with more than one column name,\n#' in which case `names_sep` or `names_pattern` must be provided in order to\n#' identify which parts of the column names go into newly created columns.\n#' See also 'Examples'.\n#' @param names_prefix A regular expression used to remove matching text from\n#' the start of each variable name.\n#' @param names_sep,names_pattern If `names_to` contains multiple values, this\n#' argument controls how the column name is broken up. `names_pattern` takes a\n#' regular expression containing matching groups, i.e. \"()\".\n#' @param values_to The name of the new column that will contain the _values_ of\n#' the columns in `select`.\n#' @param values_drop_na If `TRUE`, will drop rows that contain only `NA` in the\n#' `values_to` column. This effectively converts explicit missing values to\n#' implicit missing values, and should generally be used only when missing values\n#' in data were created by its structure.\n#' @param rows_to The name of the column that will contain the row names or row\n#' numbers from the original data. If `NULL`, will be removed.\n#' @param ... Currently not used.\n#' @inheritParams extract_column_names\n#' @param cols Identical to `select`. This argument is here to ensure compatibility\n#' with `tidyr::pivot_longer()`. If both `select` and `cols` are provided, `cols`\n#' is used.\n#'\n#' @inherit data_rename seealso\n#'\n#' @details\n#' Reshaping data into long format usually means that the input data frame is\n#' in _wide_ format, where multiple measurements taken on the same subject are\n#' stored in multiple columns (variables). The long format stores the same\n#' information in a single column, with each measurement per subject stored in\n#' a separate row. The values of all variables that are not in `select` will\n#' be repeated.\n#'\n#' The necessary information for `data_to_long()` is:\n#'\n#' - The columns that contain the repeated measurements (`select`).\n#' - The name of the newly created column that will contain the names of the\n#'   columns in `select` (`names_to`), to identify the source of the values.\n#'   `names_to` can also be a character vector with more than one column name,\n#'   in which case `names_sep` or `names_pattern` must be provided to specify\n#'   which parts of the column names go into the newly created columns.\n#' - The name of the newly created column that contains the values of the\n#'   columns in `select` (`values_to`).\n#'\n#' In other words: repeated measurements that are spread across several columns\n#' will be gathered into a single column (`values_to`), with the original column\n#' names, that identify the source of the gathered values, stored in one or more\n#' new columns (`names_to`).\n#'\n#' @return If a tibble was provided as input, `reshape_longer()` also returns a\n#' tibble. Otherwise, it returns a data frame.\n#'\n#' @examplesIf all(insight::check_if_installed(c(\"psych\", \"tidyr\"), quietly = TRUE))\n#' wide_data <- setNames(\n#'   data.frame(replicate(2, rnorm(8))),\n#'   c(\"Time1\", \"Time2\")\n#' )\n#' wide_data$ID <- 1:8\n#' wide_data\n#'\n#' # Default behaviour (equivalent to tidyr::pivot_longer(wide_data, cols = 1:3))\n#' # probably doesn't make much sense to mix \"time\" and \"id\"\n#' data_to_long(wide_data)\n#'\n#' # Customizing the names\n#' data_to_long(\n#'   wide_data,\n#'   select = c(\"Time1\", \"Time2\"),\n#'   names_to = \"Timepoint\",\n#'   values_to = \"Score\"\n#' )\n#'\n#' # Reshape multiple columns into long format.\n#' mydat <- data.frame(\n#'   age = c(20, 30, 40),\n#'   sex = c(\"Female\", \"Male\", \"Male\"),\n#'   score_t1 = c(30, 35, 32),\n#'   score_t2 = c(33, 34, 37),\n#'   score_t3 = c(36, 35, 38),\n#'   speed_t1 = c(2, 3, 1),\n#'   speed_t2 = c(3, 4, 5),\n#'   speed_t3 = c(1, 8, 6)\n#' )\n#' # The column names are split into two columns: \"type\" and \"time\". The\n#' # pattern for splitting column names is provided in `names_pattern`. Values\n#' # of all \"score_*\" and \"speed_*\" columns are gathered into a single column\n#' # named \"count\".\n#' data_to_long(\n#'   mydat,\n#'   select = 3:8,\n#'   names_to = c(\"type\", \"time\"),\n#'   names_pattern = \"(score|speed)_t(\\\\d+)\",\n#'   values_to = \"count\"\n#' )\n#'\n#' # Full example\n#' # ------------------\n#' data <- psych::bfi # Wide format with one row per participant's personality test\n#'\n#' # Pivot long format\n#' very_long_data <- data_to_long(data,\n#'   select = regex(\"\\\\d\"), # Select all columns that contain a digit\n#'   names_to = \"Item\",\n#'   values_to = \"Score\",\n#'   rows_to = \"Participant\"\n#' )\n#' head(very_long_data)\n#'\n#' even_longer_data <- data_to_long(\n#'   tidyr::who,\n#'   select = new_sp_m014:newrel_f65,\n#'   names_to = c(\"diagnosis\", \"gender\", \"age\"),\n#'   names_pattern = \"new_?(.*)_(.)(.*)\",\n#'   values_to = \"count\"\n#' )\n#' head(even_longer_data)\n#' @export\ndata_to_long <- function(\n  data,\n  select = \"all\",\n  names_to = \"name\",\n  names_prefix = NULL,\n  names_sep = NULL,\n  names_pattern = NULL,\n  values_to = \"value\",\n  values_drop_na = FALSE,\n  rows_to = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  ...,\n  cols\n) {\n  # nolint\n  original_data <- data\n\n  # Prefer \"cols\" over \"select\" for compat with tidyr::pivot_longer\n  # nolint start\n  if (!missing(cols)) {\n    select <- substitute(cols)\n    cols <- .select_nse(\n      select,\n      data,\n      exclude = NULL,\n      ignore_case = ignore_case,\n      regex = regex,\n      ifnotfound = \"error\"\n    )\n  } else if (!missing(select) || !is.null(select)) {\n    cols <- .select_nse(\n      select,\n      data,\n      exclude = NULL,\n      ignore_case = ignore_case,\n      regex = regex,\n      ifnotfound = \"error\"\n    )\n  } else {\n    insight::format_error(\n      \"You need to specify columns to pivot, either with `select` or `cols`.\"\n    )\n  }\n  # nolint end\n\n  if (length(names_to) > 1L && is.null(names_sep) && is.null(names_pattern)) {\n    insight::format_error(\n      \"If you supply multiple names in `names_to`, you must also supply one of `names_sep` or `names_pattern`.\"\n    )\n  }\n\n  if (length(names_to) == 1L) {\n    if (!is.null(names_sep)) {\n      insight::format_error(\n        \"You can't use `names_sep` when `names_to` is of length 1.\"\n      )\n    }\n    if (!is.null(names_pattern)) {\n      insight::format_error(\n        \"You can't use `names_pattern` when `names_to` is of length 1.\"\n      )\n    }\n  }\n\n  # save custom attributes\n  custom_attr <- attributes(data)\n\n  # Remove tidyverse attributes, will add them back at the end\n  if (inherits(data, \"tbl_df\")) {\n    tbl_input <- TRUE\n    data <- as.data.frame(data, stringsAsFactors = FALSE)\n  } else {\n    tbl_input <- FALSE\n  }\n\n  if (any(names_to %in% setdiff(names(data), cols))) {\n    insight::format_error(\n      \"Some values of the columns specified in `names_to` are already present as column names.\",\n      paste0(\n        \"Either use another value in `names_to` or rename the following columns: \",\n        text_concatenate(names_to[which(\n          names_to %in% setdiff(names(data), cols)\n        )])\n      )\n    )\n  }\n\n  not_selected <- setdiff(names(data), cols)\n\n  # create a temp id so that we know how to rearrange the rows once the data is\n  # stacked\n  not_stacked <- data[, not_selected, drop = FALSE]\n  not_stacked[[\"_Rows\"]] <- coerce_to_numeric(row.names(data))\n\n  # stack the selected columns\n  stacked_data <- .stack(data[, cols, drop = FALSE])[, 2:1]\n\n  # reorder the rows to have a repeated sequence when all vars are selected to\n  # pivot\n  #\n  # See with following example:\n  # wide_data <- data.frame(replicate(5, rnorm(10)))\n  # data_to_long(wide_data)\n\n  needs_to_rearrange <- length(not_selected) == 0L && is.null(rows_to)\n  if (isTRUE(needs_to_rearrange)) {\n    # https://stackoverflow.com/questions/73984957/efficient-way-to-reorder-rows-to-have-a-repeated-sequence\n    stacked_data <- stacked_data[\n      matrix(\n        seq_len(nrow(stacked_data)),\n        nrow = length(unique(stacked_data$ind)),\n        byrow = TRUE\n      ),\n    ]\n\n    row.names(stacked_data) <- NULL\n  }\n\n  stacked_data <- data_rename(stacked_data, \"values\", values_to)\n\n  # split columns if several names in names_to or names_pattern is specified\n  if (length(names_to) > 1L) {\n    if (is.null(names_pattern)) {\n      # faster than strsplit\n      tmp <- utils::read.csv(\n        text = stacked_data$ind,\n        sep = names_sep,\n        stringsAsFactors = FALSE,\n        header = FALSE\n      )\n      names(tmp) <- paste0(\"V\", seq_len(ncol(tmp)))\n      tmp[tmp == \"\"] <- NA # nolint\n\n      stacked_data$ind <- NULL\n      stacked_data <- cbind(tmp, stacked_data)\n    } else {\n      tmp <- regmatches(\n        unique(stacked_data$ind),\n        regexec(names_pattern, unique(stacked_data$ind))\n      )\n      tmp <- as.data.frame(do.call(rbind, tmp), stringsAsFactors = FALSE)\n      names(tmp) <- c(\"ind\", names_to)\n      # cbind + match is faster than merge\n      # cbind doesn't remove identical columns so we need to manually remove \"ind\"\n      # which is in both datasets\n      stacked_data <- cbind(\n        stacked_data,\n        tmp[match(stacked_data[[\"ind\"]], tmp[[\"ind\"]]), -1]\n      )\n      stacked_data$ind <- NULL\n    }\n  }\n\n  stacked_data <- data_relocate(stacked_data, select = values_to, after = -1)\n\n  # if columns in data frame have attributes (e.g. labelled data), `cbind()`\n  # won't work, so we need to remove them. We'll set them back later\n  not_stacked[] <- lapply(not_stacked, function(i) {\n    # we can't remove *all* attributes, this will convert factors into integers\n    attr(i, \"label\") <- NULL\n    attr(i, \"labels\") <- NULL\n    attr(i, \"format.spss\") <- NULL\n    class(i) <- setdiff(class(i), c(\"haven_labelled\", \"vctrs_vctr\"))\n    i\n  })\n\n  # reunite unselected data with stacked data\n  out <- cbind(\n    not_stacked,\n    stats::setNames(stacked_data, c(names_to, values_to)),\n    row.names = NULL\n  )\n\n  if (!is.null(names_prefix)) {\n    if (length(names_to) > 1L) {\n      insight::format_error(\n        \"`names_prefix` only works when `names_to` is of length 1.\"\n      )\n    }\n    out[[names_to]] <- gsub(paste0(\"^\", names_prefix), \"\", out[[names_to]])\n  }\n\n  # rearrange the rows with the temp id\n  if (length(not_selected) > 0L) {\n    out <- data_arrange(out, \"_Rows\")\n  }\n\n  # Remove or rename the row index\n  if (is.null(rows_to)) {\n    out[[\"_Rows\"]] <- NULL\n  } else {\n    out <- data_rename(out, \"_Rows\", rows_to)\n  }\n\n  if (values_drop_na) {\n    out <- out[!is.na(out[, values_to]), , drop = FALSE]\n  }\n\n  # add back attributes\n  out <- .replace_attrs(out, custom_attr)\n\n  # add back tidyverse attributes\n  if (isTRUE(tbl_input)) {\n    class(out) <- c(\"tbl_df\", \"tbl\", \"data.frame\")\n  }\n\n  # reset row names\n  if (!insight::object_has_rownames(data)) {\n    row.names(out) <- NULL\n  }\n\n  # set back labels\n  shared_columns <- intersect(colnames(out), colnames(original_data))\n  for (i in shared_columns) {\n    out[[i]] <- .set_back_labels(\n      out[[i]],\n      original_data[[i]],\n      include_values = TRUE\n    )\n  }\n\n  out\n}\n\n\n#' Code adapted from utils::stack (but largely modified)\n#'\n#' @noRd\n\n.stack <- function(x) {\n  ind <- rep(names(x), times = lengths(x))\n  # use do.call(\"c\", ...) instead of unlist to preserve the date format (but a\n  # bit slower)\n  # can't use do.call(\"c\", ...) all the time because its behavior changed with\n  # factors in 4.1.0\n  values_are_dates <- all(\n    vapply(x, .is_date, FUN.VALUE = logical(1L))\n  )\n  if (values_are_dates) {\n    data.frame(values = do.call(\"c\", unname(x)), ind, stringsAsFactors = FALSE)\n  } else {\n    data.frame(\n      values = unlist(x, use.names = FALSE),\n      ind,\n      stringsAsFactors = FALSE\n    )\n  }\n}\n\n#' @rdname data_to_long\n#' @export\nreshape_longer <- data_to_long\n"
  },
  {
    "path": "R/data_to_wide.R",
    "content": "#' Reshape (pivot) data from long to wide\n#'\n#' This function \"widens\" data, increasing the number of columns and decreasing\n#' the number of rows. This is a dependency-free base-R equivalent of\n#' `tidyr::pivot_wider()`.\n#'\n#' @param data A data frame to convert to wide format, so that it has more\n#' columns and fewer rows post-widening than pre-widening.\n#' @param id_cols The name of the column that identifies the rows in the data\n#' by which observations are grouped and the gathered data is spread into new\n#' columns. Usually, this is a variable containing an ID for observations that\n#' have been repeatedly measured. If `NULL`, it will use all remaining columns\n#' that are not in `names_from` or `values_from` as ID columns. `id_cols` can\n#' also be a character vector with more than one name of identifier columns. See\n#' also 'Details' and 'Examples'.\n#' @param names_from The name of the column in the original data whose values\n#' will be used for naming the new columns created in the widened data. Each\n#' unique value in this column will become the name of one of these new columns.\n#' In case `names_prefix` is provided, column names will be concatenated with\n#' the string given in `names_prefix`. If `values_from` specifies more than one\n#' variable that should be widened, the new column names are a combination of\n#' the old column names in `values_from` and the *values* from `names_from`, to\n#' avoid duplicate column names.\n#' @param names_prefix String added to the start of every variable name. This is\n#'  particularly useful if `names_from` is a numeric vector and you want to create\n#'  syntactic variable names.\n#' @param names_sep If `names_from` or `values_from` contains multiple variables,\n#' this will be used to join their values together into a single string to use\n#' as a column name.\n#' @param names_glue Instead of `names_sep` and `names_prefix`, you can supply a\n#' [glue specification](https://glue.tidyverse.org/index.html) that uses the\n#' `names_from` columns to create custom column names. Note that the only\n#' delimiters supported by `names_glue` are curly brackets, `{` and `}`.\n#' @param values_from The name of the columns in the original data that contains\n#' the values used to fill the new columns created in the widened data. Can also\n#' be one of the selection helpers (see argument `select` in [`data_select()`]).\n#' @param values_fill Defunct argument, which has no function anymore. Will be\n#' removed in future versions.\n#' @param verbose Toggle warnings.\n#' @param ... Not used for now.\n#' @inheritParams data_select\n#'\n#' @return If a tibble was provided as input, `data_to_wide()` also returns a\n#' tibble. Otherwise, it returns a data frame.\n#'\n#' @details\n#' Reshaping data into wide format usually means that the input data frame is\n#' in _long_ format, where multiple measurements taken on the same subject are\n#' stored in multiple rows. The wide format stores the same information in a\n#' single row, with each measurement stored in a separate column. Thus, the\n#' necessary information for `data_to_wide()` is:\n#'\n#' - The name of the column(s) that identify the groups or repeated measurements\n#'   (`id_cols`).\n#' - The name of the column whose _values_ will become the new column names\n#'   (`names_from`). Since these values may not necessarily reflect appropriate\n#'   column names, you can use `names_prefix` to add a prefix to each newly\n#'   created column name.\n#' - The name of the column(s) that contain the values (`values_from`) for the\n#'   new columns that are created by `names_from`.\n#'\n#' In other words: repeated measurements, as indicated by `id_cols`, that are\n#' saved into the column `values_from` will be spread into new columns, which\n#' will be named after the values in `names_from`. See also 'Examples'.\n#'\n#' @examplesIf requireNamespace(\"lme4\", quietly = TRUE)\n#' data_long <- read.table(header = TRUE, text = \"\n#'  subject sex condition measurement\n#'        1   M   control         7.9\n#'        1   M     cond1        12.3\n#'        1   M     cond2        10.7\n#'        2   F   control         6.3\n#'        2   F     cond1        10.6\n#'        2   F     cond2        11.1\n#'        3   F   control         9.5\n#'        3   F     cond1        13.1\n#'        3   F     cond2        13.8\n#'        4   M   control        11.5\n#'        4   M     cond1        13.4\n#'        4   M     cond2        12.9\")\n#'\n#' # converting long data into wide format\n#' data_to_wide(\n#'   data_long,\n#'   id_cols = \"subject\",\n#'   names_from = \"condition\",\n#'   values_from = \"measurement\"\n#' )\n#'\n#' # converting long data into wide format with custom column names\n#' data_to_wide(\n#'   data_long,\n#'   id_cols = \"subject\",\n#'   names_from = \"condition\",\n#'   values_from = \"measurement\",\n#'   names_prefix = \"Var.\",\n#'   names_sep = \".\"\n#' )\n#'\n#' # converting long data into wide format, combining multiple columns\n#' production <- expand.grid(\n#'   product = c(\"A\", \"B\"),\n#'   country = c(\"AI\", \"EI\"),\n#'   year = 2000:2014\n#' )\n#' production <- data_filter(production, (product == \"A\" & country == \"AI\") | product == \"B\")\n#' production$production <- rnorm(nrow(production))\n#'\n#' data_to_wide(\n#'   production,\n#'   names_from = c(\"product\", \"country\"),\n#'   values_from = \"production\",\n#'   names_glue = \"prod_{product}_{country}\"\n#' )\n#'\n#' # reshaping multiple long columns into wide format. to avoid duplicate\n#' # column names, new names are a combination of the old column names in\n#' # `values_from` and the values from `names_from`\n#' data_long <- read.table(header = TRUE, text = \"\n#' subject_id time score anxiety test\n#'          1    1    10       5   NA\n#'          1    2    NA       7   NA\n#'          2    1    15       6   NA\n#'          2    2    12      NA   NA\n#'          3    1    18       8   NA\n#'          5    2    11       4   NA\n#'          4    1    NA       5   NA\n#'          4    2    14      NA   NA\")\n#'\n#' data_to_wide(\n#'   data_long,\n#'   id_cols = \"subject_id\",\n#'   names_from = \"time\",\n#'   values_from = c(\"score\", \"anxiety\", \"test\")\n#' )\n#'\n#' # using the \"sleepstudy\" dataset\n#' data(sleepstudy, package = \"lme4\")\n#'\n#' # the sleepstudy data contains repeated measurements of average reaction\n#' # times for each subjects over multiple days, in a sleep deprivation study.\n#' # It is in long-format, i.e. each row corresponds to a single measurement.\n#' # The variable \"Days\" contains the timepoint of the measurement, and\n#' # \"Reaction\" contains the measurement itself. Converting this data to wide\n#' # format will create a new column for each day, with the reaction time as the\n#' # value.\n#' head(sleepstudy)\n#'\n#' data_to_wide(\n#'   sleepstudy,\n#'   id_cols = \"Subject\",\n#'   names_from = \"Days\",\n#'   values_from = \"Reaction\"\n#' )\n#'\n#' # clearer column names\n#' data_to_wide(\n#'   sleepstudy,\n#'   id_cols = \"Subject\",\n#'   names_from = \"Days\",\n#'   values_from = \"Reaction\",\n#'   names_prefix = \"Reaction_Day_\"\n#' )\n#'\n#' # For unequal group sizes, missing information is filled with NA\n#' d <- subset(sleepstudy, Days %in% c(0, 1, 2, 3, 4))[c(1:9, 11:13, 16:17, 21), ]\n#'\n#' # long format, different number of \"Subjects\"\n#' d\n#'\n#' data_to_wide(\n#'   d,\n#'   id_cols = \"Subject\",\n#'   names_from = \"Days\",\n#'   values_from = \"Reaction\",\n#'   names_prefix = \"Reaction_Day_\"\n#' )\n#' @inherit data_rename seealso\n#' @export\ndata_to_wide <- function(\n  data,\n  id_cols = NULL,\n  values_from = \"Value\",\n  names_from = \"Name\",\n  names_sep = \"_\",\n  names_prefix = \"\",\n  names_glue = NULL,\n  values_fill = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  ## TODO: remove in a future update (#645)\n  if (!is.null(values_fill)) {\n    insight::format_warning(\n      \"`values_fill` is defunct and has no function anymore. It will be removed in future versions.\",\n      \"To handle missing values after widening, use `convert_na_to()` instead.\"\n    )\n  }\n\n  if (is.null(names_from) || !all(names_from %in% colnames(data))) {\n    insight::format_error(\n      \"`names_from` must be the name of an existing column in `data`.\"\n    )\n  }\n\n  select <- substitute(values_from)\n  values_from <- .select_nse(\n    select,\n    data,\n    exclude = NULL,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  if (is.null(values_from) || !length(values_from)) {\n    insight::format_error(\n      \"No variable defined in `values_from` was found in the `data`.\"\n    )\n  }\n\n  if (is.null(id_cols)) {\n    id_cols <- setdiff(colnames(data), c(names_from, values_from))\n  } else if (!all(id_cols %in% colnames(data))) {\n    insight::format_error(\n      \"`id_cols` must be the names of existing columns in `data`.\"\n    )\n  }\n\n  # save custom attributes\n  custom_attr <- attributes(data)\n\n  current_colnames <- names(data)\n\n  # Preserve attributes\n  if (inherits(data, \"tbl_df\")) {\n    tbl_input <- TRUE\n    data <- as.data.frame(data, stringsAsFactors = FALSE)\n  } else {\n    tbl_input <- FALSE\n  }\n\n  variable_attr <- lapply(data, attributes)\n\n  not_unstacked <- data[, id_cols, drop = FALSE]\n  not_unstacked <- unique(not_unstacked)\n\n  # unstack doesn't create NAs for combinations that don't exist (contrary to\n  # reshape), so we need to complete the dataset before unstacking.\n\n  new_data <- data\n\n  # create an id with all variables that are not in names_from or values_from\n  # so that we can create missing combinations between this id and names_from\n  if (length(id_cols) > 1L) {\n    new_data$temporary_id <- do.call(\n      paste,\n      c(new_data[, id_cols, drop = FALSE], sep = \"_\")\n    )\n  } else if (length(id_cols) == 1L) {\n    new_data$temporary_id <- new_data[[id_cols]]\n  } else {\n    new_data$temporary_id <- seq_len(nrow(new_data))\n  }\n\n  # check that all_groups have all possible values for names_from\n  # If not, need to complete the dataset with NA for values_from where names_from\n  # didn't exist\n  n_rows_per_group <- table(new_data$temporary_id)\n  n_values_per_group <- insight::n_unique(n_rows_per_group)\n\n  not_all_cols_are_selected <- length(id_cols) > 0L\n\n  incomplete_groups <-\n    (n_values_per_group > 1L &&\n      !all(\n        unique(n_rows_per_group) %in% insight::n_unique(new_data[, names_from])\n      )) ||\n    (n_values_per_group == 1L &&\n      unique(n_rows_per_group) < length(unique(new_data[, names_from])))\n\n  # create missing combinations\n\n  if (not_all_cols_are_selected && incomplete_groups) {\n    expanded <- expand.grid(\n      unique(new_data[[\"temporary_id\"]]),\n      unique(new_data[[names_from]])\n    )\n    names(expanded) <- c(\"temporary_id\", names_from)\n    new_data <- data_merge(\n      new_data,\n      expanded,\n      join = \"full\",\n      by = c(\"temporary_id\", names_from),\n      sort = FALSE\n    )\n\n    # need to make a second temporary id to keep arrange values *without*\n    # rearranging the whole dataset\n    # Ex:\n    # \"B\"   1\n    # \"A\"   3\n    # \"A\"   NA\n    # \"B\"   NA\n    #\n    # must be rearranged as \"B\" \"B\" \"A\" \"A\" and not \"A\" \"A\" \"B\" \"B\"\n    lookup <- data.frame(\n      temporary_id = unique(\n        new_data[!is.na(new_data[values_from]), \"temporary_id\"]\n      )\n    )\n    lookup$temporary_id_2 <- seq_len(nrow(lookup))\n    new_data <- data_merge(\n      new_data,\n      lookup,\n      by = \"temporary_id\",\n      join = \"left\"\n    )\n\n    # creation of missing combinations was done with a temporary id, so need\n    # to fill columns that are not selected in names_from or values_from\n    new_data[, id_cols] <- lapply(id_cols, function(x) {\n      data <- data_arrange(new_data, c(\"temporary_id_2\", x))\n      ind <- which(!is.na(data[[x]]))\n      rep_times <- diff(c(ind, length(data[[x]]) + 1))\n      rep(data[[x]][ind], times = rep_times)\n    })\n\n    new_data <- data_arrange(new_data, \"temporary_id_2\")\n  }\n\n  # don't need temporary ids anymore\n  new_data$temporary_id <- NULL\n  new_data$temporary_id_2 <- NULL\n\n  # convert to wide format (returns the data and the order in which columns\n  # should be ordered)\n  unstacked <- .unstack(\n    new_data,\n    names_from,\n    values_from,\n    names_sep,\n    names_prefix,\n    names_glue\n  )\n\n  out <- unstacked$out\n\n  if (length(values_from) > 1L) {\n    unstacked$col_order <- unique(data[, names_from])\n    unstacked$col_order <- as.vector(\n      t(outer(values_from, unstacked$col_order, paste, sep = names_sep))\n    )\n  }\n\n  # stop if some column names would be duplicated (follow tidyr workflow)\n  if (any(unstacked$col_order %in% current_colnames)) {\n    insight::format_error(\n      \"Some values of the columns specified in `names_from` are already present as column names.\",\n      paste0(\n        \"Either use `names_prefix` or rename the following columns: \",\n        text_concatenate(current_colnames[which(\n          current_colnames %in% unstacked$col_order\n        )])\n      )\n    )\n  }\n\n  # reorder columns\n  out <- out[, unstacked$col_order, drop = FALSE]\n\n  # need to add the wide data to the original data\n  if (!insight::is_empty_object(not_unstacked)) {\n    out <- cbind(not_unstacked, out)\n  }\n  row.names(out) <- NULL\n\n  # add back attributes where possible\n  for (i in colnames(out)) {\n    attributes(out[[i]]) <- variable_attr[[i]]\n  }\n\n  # convert back to date if original values were dates\n  values_are_dates <- all(\n    vapply(data[, values_from, drop = FALSE], .is_date, FUN.VALUE = logical(1L))\n  )\n  if (values_are_dates) {\n    for (i in unstacked$col_order) {\n      out[[i]] <- as.Date.numeric(out[[i]], origin = \"1970-01-01\")\n    }\n  }\n\n  # add back attributes\n  out <- .replace_attrs(out, custom_attr)\n\n  if (isTRUE(tbl_input)) {\n    class(out) <- c(\"tbl_df\", \"tbl\", \"data.frame\")\n  }\n\n  out\n}\n\n\n#' Adapted from `utils::unstack` (but largely modified)\n#'\n#' @noRd\n\n.unstack <- function(\n  x,\n  names_from,\n  values_from,\n  names_sep,\n  names_prefix,\n  names_glue = NULL\n) {\n  # get values from names_from (future colnames)\n\n  if (is.null(names_glue)) {\n    x$future_colnames <- do.call(\n      paste,\n      c(x[, names_from, drop = FALSE], sep = names_sep)\n    )\n  } else {\n    vars <- regmatches(\n      names_glue,\n      gregexpr(\"\\\\{\\\\K[^{}]+(?=\\\\})\", names_glue, perl = TRUE)\n    )[[1]]\n    tmp_data <- x[, vars]\n    x$future_colnames <- .gluestick(names_glue, src = tmp_data)\n  }\n\n  x$future_colnames <- paste0(names_prefix, x$future_colnames)\n\n  # expand the values for each variable in \"values_from\"\n  res <- list()\n  for (i in seq_along(values_from)) {\n    res[[i]] <- tapply(x[[values_from[i]]], x$future_colnames, as.vector)\n    if (length(values_from) > 1L) {\n      names(res[[i]]) <- paste0(values_from[i], names_sep, names(res[[i]]))\n    }\n  }\n\n  # if there's a single variable in \"values_from\" and this variable only has\n  # one value, need to make it a dataframe\n\n  if (length(res) == 1L && !is.list(res[[1]])) {\n    res <- data.frame(\n      matrix(\n        res[[1]],\n        nrow = 1,\n        dimnames = list(NULL, names(res[[1]]))\n      ),\n      stringsAsFactors = FALSE,\n      check.names = FALSE\n    )\n  } else {\n    res <- unlist(res, recursive = FALSE)\n  }\n\n  # return the wide data and the order in which the new columns should be\n\n  list(\n    out = data.frame(res, stringsAsFactors = FALSE, check.names = FALSE),\n    col_order = unique(x$future_colnames)\n  )\n}\n\n\n#' @rdname data_to_wide\n#' @export\nreshape_wider <- data_to_wide\n"
  },
  {
    "path": "R/data_unique.R",
    "content": "#' @title Keep only one row from all with duplicated IDs\n#'\n#' @description From all rows with at least one duplicated ID,\n#' keep only one. Methods for selecting the duplicated row are\n#' either the first duplicate, the last duplicate, or the \"best\"\n#' duplicate (default), based on the duplicate with the smallest\n#' number of `NA`. In case of ties, it picks the first\n#' duplicate, as it is the one most likely to be valid and\n#' authentic, given practice effects.\n#'\n#' Contrarily to `dplyr::distinct()`, `data_unique()` keeps all columns.\n#'\n#' @param keep The method to be used for duplicate selection, either \"best\"\n#'   (the default), \"first\", or \"last\".\n#' @inheritParams extract_column_names\n#'\n#' @return A data frame, containing only the chosen duplicates.\n#' @seealso [data_duplicated()]\n#' @examples\n#' df1 <- data.frame(\n#'   id = c(1, 2, 3, 1, 3),\n#'   item1 = c(NA, 1, 1, 2, 3),\n#'   item2 = c(NA, 1, 1, 2, 3),\n#'   item3 = c(NA, 1, 1, 2, 3)\n#' )\n#'\n#' data_unique(df1, select = \"id\")\n#' @export\ndata_unique <- function(\n  data,\n  select = NULL,\n  keep = \"best\",\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n) {\n  UseMethod(\"data_unique\")\n}\n\n\n#' @export\ndata_unique.data.frame <- function(\n  data,\n  select = NULL,\n  keep = \"best\",\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n) {\n  select <- .select_nse(\n    select,\n    data,\n    exclude = exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # temporary_id <- paste0(sample(letters), collapse = \"\")\n  data$temporary_id2 <- do.call(paste, c(data_select(data, select), sep = \"_\"))\n\n  og.names <- names(data)\n  dups <- data_duplicated(data, select)\n\n  # if no duplicates, return the original data\n  if (nrow(dups) == 0L) {\n    data <- data_remove(data, \"temporary_id2\")\n    return(data)\n  }\n\n  # count number of duplicates\n  dups.n <- sum(duplicated(dups$temporary_id2))\n  good.dups <- data_group(dups, \"temporary_id2\")\n\n  # keep row that has the least duplicates\n  if (keep == \"best\") {\n    good.dups <- data_filter(good.dups, \"count_na == min(count_na)\")\n  }\n\n  good.dups <- good.dups[\n    !duplicated(good.dups$temporary_id2, fromLast = keep == \"last\"),\n  ]\n\n  good.dups <- data_select(good.dups, og.names)\n  out <- data[!duplicated(data$temporary_id2), , drop = FALSE]\n\n  if (keep != \"first\") {\n    match.index <- out$temporary_id2 %in% good.dups$temporary_id2\n    out[match.index, ] <- good.dups\n  }\n\n  # id is not useful anymore\n  out <- data_remove(out, \"temporary_id2\")\n\n  if (verbose) {\n    dup.msg <- sprintf(\n      \"(%s duplicates removed, with method '%s')\",\n      dups.n,\n      keep\n    )\n    dup.msg <- paste0(dup.msg, ifelse(dups.n != 69, \"\", \" 69... nice\"))\n    insight::format_alert(dup.msg)\n  }\n\n  out\n}\n\n\n#' @export\ndata_unique.grouped_df <- function(\n  data,\n  select = NULL,\n  keep = \"best\",\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n) {\n  select <- .select_nse(\n    select,\n    data,\n    exclude = exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  grps <- attr(data, \"groups\", exact = TRUE)\n  grps <- grps[[\".rows\"]]\n\n  data2 <- as.data.frame(data_ungroup(data))\n\n  out <- lapply(grps, function(x) {\n    data_unique.data.frame(\n      data2[x, ],\n      select = select,\n      keep = keep,\n      verbose = verbose\n    )\n  })\n\n  out <- do.call(rbind, out)\n\n  if (!insight::object_has_rownames(data)) {\n    rownames(out) <- NULL\n  }\n\n  class(out) <- class(data)\n  attr(out, \"groups\") <- attr(data, \"groups\")\n\n  out\n}\n"
  },
  {
    "path": "R/data_unite.R",
    "content": "#' @title Unite (\"merge\") multiple variables\n#' @name data_unite\n#'\n#' @description\n#' Merge values of multiple variables per observation into one new variable.\n#'\n#' @param data A data frame.\n#' @param new_column The name of the new column, as a string.\n#' @param separator A character to use between values.\n#' @param append Logical, if `FALSE` (default), removes original columns that\n#' were united. If `TRUE`, all columns are preserved and the new column is\n#' appended to the data frame.\n#' @param remove_na Logical, if `TRUE`, missing values (`NA`) are not included\n#' in the united values. If `FALSE`, missing values are represented as `\"NA\"`\n#' in the united values.\n#' @param ... Currently not used.\n#' @inheritParams extract_column_names\n#'\n#' @seealso [`data_separate()`]\n#'\n#' @return `data`, with a newly created variable.\n#'\n#' @examples\n#' d <- data.frame(\n#'   x = 1:3,\n#'   y = letters[1:3],\n#'   z = 6:8\n#' )\n#' d\n#' data_unite(d, new_column = \"xyz\")\n#' data_unite(d, new_column = \"xyz\", remove = FALSE)\n#' data_unite(d, new_column = \"xyz\", select = c(\"x\", \"z\"))\n#' data_unite(d, new_column = \"xyz\", select = c(\"x\", \"z\"), append = TRUE)\n#' @export\ndata_unite <- function(\n  data,\n  new_column = NULL,\n  select = NULL,\n  exclude = NULL,\n  separator = \"_\",\n  append = FALSE,\n  remove_na = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n) {\n  # we need a name for the new column\n  if (is.null(new_column)) {\n    insight::format_error(\n      \"No name for the new column was provided.\",\n      \"Please use `new_column` to define a name for the newly created column.\"\n    )\n  }\n\n  # only one column name\n  if (length(new_column) > 1) {\n    insight::format_error(\n      \"Please provide only a single string for `new_column`, no character vector with multiple values.\"\n    )\n  }\n\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  if (is.null(select) || length(select) <= 1) {\n    insight::format_error(\n      \"At least two columns in `select` are required for `data_unite()`.\"\n    )\n  }\n\n  # unite\n  out <- data.frame(\n    new_col = do.call(paste, c(data[select], sep = separator)),\n    stringsAsFactors = FALSE\n  )\n  colnames(out) <- new_column\n\n  # remove missings\n  if (remove_na) {\n    # remove trailing and leading \"NA_\" and \"_NA\"\n    out[[new_column]] <- gsub(paste0(\"^NA\", separator), \"\", out[[new_column]])\n    out[[new_column]] <- gsub(paste0(separator, \"NA$\"), \"\", out[[new_column]])\n    # remove _NA_ inside string, add separator back. This ensure we match\n    # whole-word NA and do not break strings like \"COUNTRY_NATION\"\n    out[[new_column]] <- gsub(\n      paste0(separator, \"NA\", separator),\n      separator,\n      out[[new_column]],\n      fixed = TRUE\n    )\n  }\n\n  # remove old columns\n  if (!isTRUE(append)) {\n    data[select] <- NULL\n  }\n\n  # overwrite?\n  if (new_column %in% colnames(data) && verbose) {\n    insight::format_alert(\n      \"The name for `new_column` already exists as variable name in the data.\",\n      \"This variable will be replaced by `new_column`.\"\n    )\n  }\n\n  # overwrite or append\n  data[[new_column]] <- out[[new_column]]\n\n  # fin\n  data\n}\n"
  },
  {
    "path": "R/data_write.R",
    "content": "#' @param data The data frame that should be written to a file.\n#' @param delimiter For CSV-files, specifies the delimiter. Defaults to `\",\"`,\n#'   but in particular in European regions, `\";\"` might be a useful alternative,\n#'   especially when exported CSV-files should be opened in Excel.\n#' @param save_labels Only applies to CSV files. If `TRUE`, value and variable\n#'   labels (if any) will be saved as additional CSV file. This file has the same\n#'   file name as the exported CSV file, but includes a `\"_labels\"` suffix (i.e.\n#'   when the file name is `\"mydat.csv\"`, the additional file with value and\n#'   variable labels is named `\"mydat_labels.csv\"`).\n#' @rdname data_read\n#' @export\ndata_write <- function(\n  data,\n  path,\n  delimiter = \",\",\n  convert_factors = FALSE,\n  save_labels = FALSE,\n  password = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # check file type, so we know the target dta format\n  file_type <- .file_ext(path)\n  type <- switch(\n    file_type,\n    txt = ,\n    csv = \"csv\",\n    sav = ,\n    por = \"spss\",\n    zsav = \"zspss\",\n    dta = \"stata\",\n    xpt = \"sas\",\n    rds = \"rds\",\n    rda = ,\n    rdata = \"rda\",\n    parquet = \"parquet\",\n    \"unknown\"\n  )\n\n  # no file type provided?\n  if (!is.character(file_type) || file_type == \"\") {\n    insight::format_error(\n      \"Could not detect file type. The `path` argument has no file extension.\",\n      \"Please provide a file path including extension, like \\\"myfile.csv\\\" or \\\"c:/Users/Default/myfile.sav\\\".\"\n    )\n  }\n\n  if (type %in% c(\"csv\", \"unknown\")) {\n    .write_csv_or_unknown(\n      data,\n      path,\n      type,\n      delimiter,\n      convert_factors,\n      save_labels,\n      password,\n      verbose,\n      ...\n    )\n  } else if (type == \"rds\") {\n    .write_rds(data, path, password, verbose, ...)\n  } else if (type == \"rda\") {\n    .write_rda(data, path, password, verbose, ...)\n  } else if (type == \"parquet\") {\n    .write_parquet(data, path, password, verbose, ...)\n  } else {\n    .write_haven(data, path, password, verbose, type, ...)\n  }\n}\n\n\n# base R formats -----\n\n.write_rds <- function(data, path, password, verbose = TRUE, ...) {\n  # encrypt data\n  data <- .data_encryption(data, password)\n  saveRDS(data, path, ...)\n}\n\n.write_rda <- function(data, path, password, verbose = TRUE, ...) {\n  # encrypt data\n  data <- .data_encryption(data, password)\n  # save single data frame\n  if (is.data.frame(data) || is.raw(data)) {\n    save(data, file = path, ...)\n  } else {\n    # save list of data frames\n    env <- as.environment(data)\n    save(list = names(data), file = path, envir = env, ...)\n  }\n}\n\n\n# nanoparquet -----\n\n.write_parquet <- function(data, path, password, verbose = TRUE, ...) {\n  # saving raw columns in data frames is not yet supported by parquet, thus,\n  # we cannot save encrypted data right now.\n  if (!is.null(password)) {\n    insight::format_error(\n      \"Data encryption is not supported for parquet-files.\"\n    )\n  }\n\n  # requires nanoparquet package\n  insight::check_if_installed(\"nanoparquet\")\n\n  # save single data frame\n  nanoparquet::write_parquet(x = data, file = path, ...)\n}\n\n\n# saving into CSV or unknown format -----\n\n.write_csv_or_unknown <- function(\n  data,\n  path,\n  type = \"csv\",\n  delimiter = \",\",\n  convert_factors = FALSE,\n  save_labels = FALSE,\n  password = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # data encryption requires a data frame *with attributes* to be saved, which\n  # is not possible for text or raw formats - thus, no encryption here\n  if (!is.null(password)) {\n    insight::format_error(\n      \"Data encryption is not supported for CSV or text files.\"\n    )\n  }\n\n  # save labels\n  if (save_labels && type == \"csv\") {\n    data <- .save_labels_to_file(data, path, delimiter, verbose)\n  }\n\n  # this might make sense when writing labelled data to CSV\n  if (convert_factors) {\n    data <- .pre_process_exported_data(data, convert_factors)\n  }\n\n  if (type == \"csv\") {\n    insight::check_if_installed(\"readr\")\n    if (delimiter == \",\") {\n      readr::write_csv(x = data, file = path, ...)\n    } else {\n      readr::write_csv2(x = data, file = path, ...)\n    }\n  } else {\n    insight::check_if_installed(\"rio\")\n    rio::export(x = data, file = path, ...)\n  }\n}\n\n\n# saving into haven format -----\n\n.write_haven <- function(\n  data,\n  path,\n  password,\n  verbose = TRUE,\n  type = \"spss\",\n  ...\n) {\n  # saving raw columns in data frames in not yet supported by haven, thus,\n  # we cannot save encrypted data right now.\n  if (!is.null(password)) {\n    insight::format_error(\n      \"Data encryption is not supported for SPSS, SAS or Stata files.\"\n    )\n  }\n\n  insight::check_if_installed(\"haven\")\n\n  # check if user provided \"compress\" argument for SPSS files,\n  # else, default to compression\n  dots <- list(...)\n  if (!is.null(dots$compress)) {\n    compress <- dots$compress\n  } else if (identical(type, \"zspss\")) {\n    compress <- \"zsav\"\n  } else {\n    compress <- \"byte\"\n  }\n\n  # labelled data needs \"labelled\" class attributes\n  data <- .set_haven_class_attributes(data, verbose)\n\n  # fix invalid column names\n  data <- .fix_column_names(data, verbose)\n\n  if (type %in% c(\"spss\", \"zspss\")) {\n    # write to SPSS\n    haven::write_sav(data = data, path = path, compress = compress)\n  } else if (type == \"stata\") {\n    # write to Stata\n    haven::write_dta(data = data, path = path, ...)\n  } else {\n    # write to SAS\n    haven::write_xpt(data = data, path = path, ...)\n  }\n}\n\n\n# helper -------------------------------\n\n# make sure we have the \"labelled\" class for labelled data\n.set_haven_class_attributes <- function(x, verbose = TRUE) {\n  insight::check_if_installed(\"haven\")\n\n  if (verbose) {\n    insight::format_alert(\"Preparing data file: converting variable types.\")\n  }\n  x[] <- lapply(x, function(i) {\n    # make sure we have labelled class for labelled data\n    value_labels <- attr(i, \"labels\", exact = TRUE)\n    variable_label <- attr(i, \"label\", exact = TRUE)\n    # factor requires special preparation to save levels as labels\n    # haven:::vec_cast_named requires \"x\" and \"labels\" to be of same type\n    if (is.factor(i)) {\n      haven::labelled(\n        x = as.numeric(i),\n        labels = stats::setNames(seq_along(levels(i)), levels(i)),\n        label = variable_label\n      )\n    } else if (!is.null(value_labels) || !is.null(variable_label)) {\n      # character requires special preparation to save value labels\n      # haven:::vec_cast_named requires \"x\" and \"labels\" to be of same type\n      if (is.character(i)) {\n        # only prepare value labels when these are not NULL\n        if (!is.null(value_labels)) {\n          value_labels <- stats::setNames(\n            as.character(value_labels),\n            names(value_labels)\n          )\n        }\n        haven::labelled(\n          x = i,\n          labels = value_labels,\n          label = variable_label\n        )\n      } else {\n        # this should work for the remaining types...\n        haven::labelled(x = i, labels = value_labels, label = variable_label)\n      }\n    } else {\n      # non labelled data can be saved \"as is\"\n      i\n    }\n  })\n  x\n}\n\n\n# packages like SPSS cannot deal with variable which names end with a dot\n# fix column names here by added a \"fix\" suffix\n.fix_column_names <- function(x, verbose = TRUE) {\n  # check for correct column names\n  dot_ends <- vapply(colnames(x), endsWith, FUN.VALUE = TRUE, suffix = \".\")\n  if (any(dot_ends)) {\n    if (verbose) {\n      insight::format_alert(\n        \"Found and fixed invalid column names so they can be read by other software packages.\"\n      )\n    }\n    colnames(x)[dot_ends] <- paste0(colnames(x)[dot_ends], \"fix\")\n  }\n  x\n}\n\n\n# save value and variable labels as addtional file\n.save_labels_to_file <- function(x, path, delimiter, verbose = TRUE) {\n  insight::check_if_installed(\"readr\")\n\n  # get file path information\n  fpath <- dirname(path)\n  fname <- sub(\"\\\\.csv$\", \"\", basename(path))\n  path <- paste0(fpath, .Platform$file.sep, fname, \"_labels.csv\")\n\n  if (verbose) {\n    insight::format_alert(\n      paste0(\"Saving variable and value labels to \\\"\", path, \"\\\".\")\n    )\n  }\n\n  # extract labels\n  var_labs <- vapply(\n    x,\n    function(i) {\n      l <- attr(i, \"label\", exact = TRUE)\n      if (is.null(l)) {\n        l <- \"\"\n      }\n      l\n    },\n    character(1)\n  )\n\n  # extract value labels\n  value_labs <- vapply(\n    x,\n    function(i) {\n      l <- attr(i, \"labels\", exact = TRUE)\n      if (is.null(l)) {\n        \"\"\n      } else {\n        paste0(l, \"=\", names(l), collapse = \"; \")\n      }\n    },\n    character(1)\n  )\n\n  out <- data.frame(\n    variable = colnames(x),\n    label = var_labs,\n    labels = value_labs,\n    stringsAsFactors = FALSE\n  )\n\n  if (delimiter == \",\") {\n    readr::write_csv(x = out, file = path)\n  } else {\n    readr::write_csv2(x = out, file = path)\n  }\n}\n\n\n# process data for export, use factor levels as data values -------------------\n\n.pre_process_exported_data <- function(x, convert_factors) {\n  # user may decide whether we automatically detect variable type or not\n  if (isTRUE(convert_factors)) {\n    x[] <- lapply(x, function(i) {\n      # only proceed if not all missing\n      if (!all(is.na(i))) {\n        # save labels\n        value_labels <- attr(i, \"labels\", exact = TRUE)\n        variable_labels <- attr(i, \"label\", exact = TRUE)\n\n        # filter, so only matching value labels remain\n        value_labels <- value_labels[value_labels %in% unique(i)]\n\n        # guess variable type\n        if (is.character(i)) {\n          # we need this to drop haven-specific class attributes\n          i <- as.character(i)\n        } else if (\n          !is.null(value_labels) && length(value_labels) == insight::n_unique(i)\n        ) {\n          # if all values are labelled, we assume factor. Use labels as levels\n          if (is.numeric(i)) {\n            i <- factor(i, labels = names(value_labels))\n          } else {\n            i <- factor(as.character(i), labels = names(value_labels))\n          }\n          i <- as.character(i)\n        } else {\n          # else, fall back to numeric\n          i <- as.numeric(as.character(i))\n        }\n        # add back variable label\n        attr(i, \"label\") <- variable_labels\n      }\n      i\n    })\n  } else {\n    # drop haven class attributes\n    x[] <- lapply(x, function(i) {\n      # save labels\n      class(i) <- setdiff(class(i), c(\"haven_labelled\", \"vctrs_vctr\"))\n      i\n    })\n  }\n\n  class(x) <- \"data.frame\"\n  x\n}\n\n# data encryption ---------------------------------\n\n.data_encryption <- function(data, password = NULL) {\n  # check if data should be encrypted\n  if (!is.null(password)) {\n    .validate_password(password)\n    data <- .encrypt_data(data, password)\n    # tell user to remember the password\n    insight::format_warning(\n      \"Remember your `password`, else you will not be able to decrypt the data again!\"\n    )\n  }\n  data\n}\n\n.encrypt_data <- function(data, password = NULL) {\n  insight::check_if_installed(\"openssl\", \"for data encryption\")\n  x <- serialize(data, NULL)\n  # it is important to remember the phrase! else, you cannot decrypt the data\n  passphrase <- charToRaw(password)\n  key <- openssl::sha256(passphrase)\n  # encrypt the data. we return the raw data here, which can be handled by\n  # rds/rda/rdata, and users can then also decrypt using openssl directly\n  # datawizard is not necessarily needed for decryption\n  openssl::aes_gcm_encrypt(x, key = key)\n}\n\n.validate_password <- function(password) {\n  # password needs to be a character string\n  if (!is.character(password) || length(password) != 1L || !nzchar(password)) {\n    insight::format_error(\n      \"The password must be a single non-empty character string.\"\n    )\n  }\n}\n"
  },
  {
    "path": "R/data_xtabulate.R",
    "content": "# helper to compute crosstables --------------\n\n.crosstable <- function(\n  x,\n  by,\n  weights = NULL,\n  remove_na = FALSE,\n  proportions = NULL,\n  obj_name = NULL,\n  group_variable = NULL\n) {\n  if (!is.null(proportions)) {\n    proportions <- match.arg(proportions, c(\"row\", \"column\", \"full\"))\n  }\n  # frequency table\n  if (is.null(weights)) {\n    # we have a `.default` and a `.data.frame` method for `data_tabulate()`.\n    # since this is the default, `x` can be an object which cannot be used\n    # with `table()`, that's why we add `tryCatch()` here. Below we give an\n    # informative error message for non-supported objects.\n    if (remove_na) {\n      x_table <- tryCatch(table(x, by), error = function(e) NULL)\n    } else {\n      x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)\n    }\n  } else if (remove_na) {\n    # weighted frequency table, excluding NA\n    x_table <- tryCatch(\n      stats::xtabs(\n        weights ~ x + by,\n        data = data.frame(weights = weights, x = x, by = by),\n        na.action = stats::na.omit,\n        addNA = FALSE\n      ),\n      error = function(e) NULL\n    )\n  } else {\n    # weighted frequency table, including NA\n    x_table <- tryCatch(\n      stats::xtabs(\n        weights ~ x + by,\n        data = data.frame(weights = weights, x = addNA(x), by = addNA(by)),\n        na.action = stats::na.pass,\n        addNA = TRUE\n      ),\n      error = function(e) NULL\n    )\n  }\n\n  if (is.null(x_table)) {\n    insight::format_warning(paste0(\n      \"Can't compute cross tables for objects of class `\",\n      class(x)[1],\n      \"`.\"\n    ))\n    return(NULL)\n  }\n\n  out <- as.data.frame(stats::ftable(x_table))\n  colnames(out) <- c(\"Value\", \"by\", \"N\")\n  total_n <- sum(out$N, na.rm = TRUE)\n\n  # we want to round N for weighted frequencies\n  if (!is.null(weights)) {\n    out$N <- round(out$N)\n    total_n <- round(total_n)\n  }\n\n  out <- data_to_wide(out, values_from = \"N\", names_from = \"by\")\n\n  # use variable name as column name\n  if (!is.null(obj_name)) {\n    colnames(out)[1] <- obj_name\n  }\n\n  # for grouped data frames, add info about grouping variables\n  if (!is.null(group_variable)) {\n    var_info <- toString(lapply(colnames(group_variable), function(i) {\n      sprintf(\"%s (%s)\", i, group_variable[[i]])\n    }))\n    out <- cbind(\n      out[1],\n      data.frame(Group = var_info, stringsAsFactors = FALSE),\n      out[-1]\n    )\n  }\n\n  attr(out, \"total_n\") <- total_n\n  attr(out, \"weights\") <- weights\n  attr(out, \"proportions\") <- proportions\n  attr(out, \"varname\") <- obj_name\n  attr(out, \"grouped_df\") <- !is.null(group_variable)\n  attr(out, \"prop_table\") <- .prop_table(out)\n\n  class(out) <- c(\"datawizard_crosstab\", \"data.frame\")\n\n  out\n}\n\n# Helper function to calculate a table of proportions from a frequency table\n.prop_table <- function(x) {\n  # Extract the \"proportions\" attribute, which determines the type of calculation (row, column, or full)\n  props <- attributes(x)$proportions\n  out <- NULL\n\n  # Proceed only if the \"proportions\" attribute is set\n  if (!is.null(props)) {\n    # Identify numeric columns, as proportions are only calculated for these\n    numeric_columns <- vapply(x, is.numeric, logical(1))\n    # Get the total count from the attributes, used for \"full\" proportions\n    total_n <- attributes(x)$total_n\n\n    # Use a switch to perform the calculation based on the \"props\" value\n    out <- switch(\n      props,\n      # Calculate row-wise proportions\n      row = lapply(seq_len(nrow(x)), function(i) {\n        # Sum of the current row's numeric values\n        row_sum <- sum(x[i, numeric_columns], na.rm = TRUE)\n        # Avoid division by zero; if row sum is 0, return a row of zeros\n        if (row_sum == 0) {\n          tmp <- as.data.frame(as.list(rep(0, sum(numeric_columns))))\n          # for later rbind, we need identical column names\n          colnames(tmp) <- colnames(x)[numeric_columns]\n          tmp\n        } else {\n          x[i, numeric_columns] / row_sum\n        }\n      }),\n      # Calculate column-wise proportions\n      column = lapply(seq_len(ncol(x))[numeric_columns], function(i) {\n        # Sum of the current column's values\n        col_sum <- sum(x[, i], na.rm = TRUE)\n        # Avoid division by zero; if column sum is 0, return a vector of zeros\n        if (col_sum == 0) {\n          rep(0, nrow(x))\n        } else {\n          x[, i] / col_sum\n        }\n      }),\n      # Calculate proportions relative to the total count of the entire table\n      full = lapply(seq_len(ncol(x))[numeric_columns], function(i) {\n        # Avoid division by zero; if total is 0, return a vector of zeros\n        if (total_n == 0) {\n          rep(0, nrow(x))\n        } else {\n          x[, i] / total_n\n        }\n      })\n    )\n  }\n\n  # If a proportion table was calculated, format it into a data frame\n  if (!is.null(out)) {\n    # The output of the switch is a list. We need to bind it into a data frame.\n    # For row proportions, we bind rows. For column/full, we bind columns.\n    out <- switch(\n      props,\n      row = as.data.frame(do.call(rbind, out)),\n      as.data.frame(do.call(cbind, out))\n    )\n    # Set the column names of the new proportion table\n    colnames(out) <- colnames(x)[numeric_columns]\n\n    # Check if the dimensions are consistent before setting row names\n    if (nrow(out) == nrow(x)) {\n      # If the first column of the original data is not numeric, it's likely a\n      # label column. Use these labels as row names in the output for better\n      # readability. This is useful for identifying rows, especially when NAs\n      # are present.\n      if (isFALSE(numeric_columns[1])) {\n        r_names <- x[[1]]\n        r_names <- as.character(r_names)\n        # Replace NA in labels with the string \"NA\", else we cannot set rownames\n        r_names[is.na(r_names)] <- \"NA\"\n        rownames(out) <- r_names\n      } else {\n        # Otherwise, just use the original row names\n        rownames(out) <- rownames(x)\n      }\n    }\n  }\n\n  out\n}\n\n\n# methods ---------------------\n\n#' @export\nformat.datawizard_crosstab <- function(\n  x,\n  format = \"text\",\n  digits = 1,\n  big_mark = NULL,\n  include_total_row = TRUE,\n  ...\n) {\n  # convert to character manually, else, for large numbers,\n  # format_table() returns scientific notation\n  x <- as.data.frame(x)\n\n  # find numeric columns, only for these we need row/column sums\n  numeric_columns <- which(vapply(x, is.numeric, logical(1)))\n\n  # compute total N for rows and columns\n  total_n <- attributes(x)$total_n\n  total_column <- rowSums(x[numeric_columns], na.rm = TRUE)\n  total_row <- c(colSums(x[numeric_columns], na.rm = TRUE), total_n)\n\n  # proportions?\n  props <- attributes(x)$proportions\n  prop_table <- attributes(x)$prop_table\n\n  if (!is.null(props) && !is.null(prop_table)) {\n    for (i in seq_len(ncol(prop_table))) {\n      x[, numeric_columns[i]] <- paste(\n        format(x[, numeric_columns[i]]),\n        format(\n          sprintf(\"(%.*f%%)\", digits, 100 * prop_table[, i]),\n          justify = \"right\"\n        )\n      )\n    }\n  }\n\n  x[] <- lapply(x, as.character)\n\n  # format data frame\n  ftab <- insight::format_table(x, ...)\n  # replace empty cells with NA\n  ftab[] <- lapply(ftab, function(i) {\n    i[i == \"\"] <- ifelse(identical(format, \"text\"), \"<NA>\", \"(NA)\") # nolint\n    i\n  })\n  # Remove \".00\" from numbers\n  ftab$Total <- gsub(\"\\\\.00$\", \"\", as.character(total_column))\n\n  # add final total row to each sub-table. For multiple, collapsed table\n  # (i.e. when length of `by` > 1), we don't want multiple total rows in the\n  # table, so we would set include_total_row = FALSE for objects of class\n  # `datawizard_crosstabs` (note plural s!)\n  if (include_total_row) {\n    # for text format, insert \"empty row\" before last total row\n    if (identical(format, \"text\") || identical(format, \"markdown\")) {\n      empty_row <- as.data.frame(t(data.frame(\n        rep(\"\", ncol(ftab)),\n        c(\"Total\", as.character(total_row)),\n        stringsAsFactors = FALSE\n      )))\n    } else {\n      empty_row <- as.data.frame(t(data.frame(\n        c(\"Total\", as.character(total_row)),\n        stringsAsFactors = FALSE\n      )))\n    }\n    colnames(empty_row) <- colnames(ftab)\n    ftab <- rbind(ftab, empty_row)\n    ftab[nrow(ftab), ] <- gsub(\"\\\\.00$\", \"\", ftab[nrow(ftab), ])\n  }\n\n  # insert big marks?\n  ftab$Total <- .add_commas_in_numbers(ftab$Total, big_mark)\n  ftab[nrow(ftab), ] <- .add_commas_in_numbers(ftab[nrow(ftab), ], big_mark)\n\n  # also format NA column name\n  colnames(ftab)[colnames(ftab) == \"NA\"] <- ifelse(\n    identical(format, \"text\"),\n    \"<NA>\",\n    \"(NA)\"\n  )\n\n  ftab\n}\n\n\n# print, datawizard_crosstab ---------------------\n\n#' @export\nprint.datawizard_crosstab <- function(x, big_mark = NULL, ...) {\n  .print_text_table(x, big_mark, format = \"text\", ...)\n  invisible(x)\n}\n\n\n#' @export\nprint_md.datawizard_crosstab <- function(x, big_mark = NULL, ...) {\n  .print_text_table(x, big_mark, format = \"markdown\", ...)\n}\n\n\n#' @export\nprint_html.datawizard_crosstab <- function(x, big_mark = NULL, ...) {\n  .print_text_table(x, big_mark, format = \"html\", ...)\n}\n\n\n.print_text_table <- function(x, big_mark = NULL, format = \"text\", ...) {\n  # grouped data? if yes, add information on grouping factor\n  if (is.null(x[[\"Group\"]])) {\n    caption <- NULL\n  } else {\n    caption <- paste0(\"Grouped by \", x[[\"Group\"]][1])\n    x$Group <- NULL\n  }\n\n  # this function is used by all four supported format, markdown, text, html\n  # and tt (tinytable). For tt, we sometimes have format \"html\" and backend = \"tt\",\n  # so we need to check for this special case\n  backend <- switch(format, html = , tt = .check_format_backend(...), format)\n  # prepare table arguments\n  fun_args <- list(\n    format(x, big_mark = big_mark, format = format, ...),\n    caption = caption,\n    format = backend\n  )\n  if (!format %in% c(\"html\", \"tt\")) {\n    fun_args$cross <- \"+\"\n    fun_args$empty_line <- \"-\"\n  }\n  if (format == \"text\") {\n    fun_args$missing <- \"<NA>\"\n  } else {\n    fun_args$missing <- \"(NA)\"\n  }\n  out <- do.call(insight::export_table, c(fun_args, list(...)))\n\n  # print table\n  if (identical(format, \"text\")) {\n    cat(out)\n  } else {\n    out\n  }\n}\n\n\n# print, datawizard_crosstabs ---------------------\n\n#' @export\nprint.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {\n  .print_text_tables(x, big_mark, format = \"text\", ...)\n  invisible(x)\n}\n\n\n#' @export\nprint_md.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {\n  .print_text_tables(x, big_mark, format = \"markdown\", ...)\n}\n\n\n#' @export\nprint_html.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {\n  .print_text_tables(x, big_mark, format = \"html\", ...)\n}\n\n\n.print_text_tables <- function(x, big_mark = NULL, format = \"text\", ...) {\n  if (length(x) == 1) {\n    .print_text_table(x[[1]], big_mark = big_mark, format = format, ...)\n  } else {\n    x <- lapply(x, function(i) {\n      # grouped data? if yes, add information on grouping factor\n      if (!is.null(i[[\"Group\"]])) {\n        i$groups <- paste0(\"Grouped by \", i[[\"Group\"]][1])\n        i$Group <- NULL\n      }\n      # if we don't have the gt-grouping variable \"groups\" yet, we use it now\n      # for grouping. Else, we use a new column named \"Variable\", to avoid\n      # overwriting the groups-variable from grouped data frames\n      if (is.null(i$groups) && identical(format, \"html\")) {\n        grp_variable <- \"groups\"\n      } else {\n        grp_variable <- \"Variable\"\n      }\n      # first variable differs for each data frame, so we harmonize it here\n      i[[grp_variable]] <- colnames(i)[1]\n      colnames(i)[1] <- \"Value\"\n      # move column to first position\n      i <- data_relocate(i, select = grp_variable, before = 1)\n      # format data frame\n      format(\n        i,\n        format = format,\n        big_mark = big_mark,\n        include_total_row = FALSE,\n        ...\n      )\n    })\n    # now bind, but we need to check for equal number of columns\n    if (all(lengths(x) == max(length(x)))) {\n      out <- do.call(rbind, x)\n    } else {\n      # if not all tables have identical columns, we can use \"data_merge()\",\n      # which safely row-binds all data frames. However, the column order can be\n      # messed up, so we save column order here and restore it later\n      col_order <- colnames(x[[which.max(lengths(x))]])\n      out <- data_merge(x, join = \"bind\")[col_order]\n    }\n\n    # split tables for grouped data frames\n    if (!is.null(out$groups)) {\n      out <- split(out, out$groups)\n      out <- lapply(out, function(subtable) {\n        # for text and markdown, if we split tables, we remove the \"groups\"\n        # variable. we need to keep it for HTML tables.\n        if (!identical(format, \"html\")) {\n          attr(subtable, \"table_caption\") <- c(unique(subtable$groups), \"blue\")\n          subtable$groups <- NULL\n        }\n        # remove duplicated names\n        for (grpvars in c(\"Variable\", \"Group\")) {\n          if (!is.null(subtable[[grpvars]])) {\n            subtable[[grpvars]][duplicated(subtable[[grpvars]])] <- \"\"\n          }\n        }\n        subtable\n      })\n      # no splitting of grouped data frames into list for HTML format,\n      # because splitting is done by the `by` argument later\n      if (identical(format, \"html\")) {\n        out <- do.call(rbind, out)\n      }\n    }\n\n    # prepare table arguments\n    fun_args <- list(\n      out,\n      format = format\n    )\n    if (format != \"html\") {\n      fun_args$cross <- \"+\"\n      fun_args$empty_line <- \"-\"\n    } else {\n      fun_args$by <- \"groups\"\n    }\n    if (format == \"text\") {\n      fun_args$missing <- \"<NA>\"\n    } else {\n      fun_args$missing <- \"(NA)\"\n    }\n    out <- do.call(insight::export_table, c(fun_args, list(...)))\n\n    # print table\n    if (identical(format, \"text\")) {\n      cat(out)\n    } else {\n      out\n    }\n  }\n}\n\n\n# helper ---------------------\n\n.validate_by <- function(by, x) {\n  if (!is.null(by)) {\n    if (is.character(by)) {\n      # If \"by\" is a character string, must be of length 1\n      if (length(by) > 1) {\n        insight::format_error(\n          \"If `by` is a string indicating a variable name, `by` must be of length 1.\",\n          \"You may use `data_group()` to group by multiple variables, then call `data_tabulate()`.\"\n        )\n      }\n      # if \"by\" is a character, \"x\" must be a data frame\n      if (!is.data.frame(x)) {\n        insight::format_error(\n          \"If `by` is a string indicating a variable name, `x` must be a data frame.\"\n        )\n      }\n      # is \"by\" a column in \"x\"?\n      if (!by %in% colnames(x)) {\n        insight::format_error(sprintf(\n          \"The variable specified in `by` was not found in `x`. %s\",\n          .misspelled_string(names(x), by, \"Possibly misspelled?\")\n        ))\n      }\n      by <- x[[by]]\n    }\n    # is \"by\" of same length as \"x\"?\n    if (is.data.frame(x) && length(by) != nrow(x)) {\n      insight::format_error(\n        \"Length of `by` must be equal to number of rows in `x`.\"\n      ) # nolint\n    }\n    if (!is.data.frame(x) && length(by) != length(x)) {\n      insight::format_error(\"Length of `by` must be equal to length of `x`.\") # nolint\n    }\n    if (!is.factor(by)) {\n      # coerce \"by\" to factor, including labels\n      by <- to_factor(by, labels_to_levels = TRUE, verbose = FALSE)\n    }\n  }\n\n  by\n}\n\n\n.validate_table_weights <- function(weights, x, weights_expression = NULL) {\n  # exception: for vectors, if weighting variable not found, \"weights\" is NULL.\n  # to check this, we further need to check whether a weights expression was\n  # provided, e.g. \"weights = iris$not_found\" - all this is only relevant when\n  # weights is NULL\n  if (is.null(weights)) {\n    # possibly misspelled weights-variables for default-method ----------------\n    # -------------------------------------------------------------------------\n\n    # do we have any value for weights_expression?\n    if (\n      !is.null(weights_expression) &&\n        # due to deparse() and substitute, NULL becomes \"NULL\" - we need to check for this\n        !identical(weights_expression, \"NULL\") &&\n        # we should only run into this problem, when a variable from a data frame\n        # is used in the data_tabulate() method for vectors - thus, we need to check\n        # whether the weights_expression contains a \"$\" - `iris$not_found` is \"NULL\"\n        # we need this check, because the default-method of data_tabulate() is called\n        # from the data.frame method, where `weights = weights`, and then,\n        # deparse(substitute(weights)) is \"weights\" (not \"NULL\" or \"iris$not_found\"),\n        # leading to an error when actually all is OK (if \"weights\" is NULL)\n        # Example:\n        #> efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n        # Here, efc$wweight is NULL\n        #> data_tabulate(efc$c172code, weights = efc$wweight)\n        # Here, wweight errors anyway, because object \"wweight\" is not found\n        #> data_tabulate(efc$c172code, weights = wweight)\n        grepl(\"$\", weights_expression, fixed = TRUE)\n    ) {\n      insight::format_error(\n        \"The variable specified in `weights` was not found. Possibly misspelled?\"\n      )\n    }\n  } else {\n    # possibly misspecified weights-variables for data.frame-method -----------\n    # -------------------------------------------------------------------------\n\n    if (is.character(weights)) {\n      # If \"weights\" is a character string, must be of length 1\n      if (length(weights) > 1) {\n        insight::format_error(\n          \"If `weights` is a string indicating a variable name, `weights` must be of length 1.\"\n        )\n      }\n      # if \"weights\" is a character, \"x\" must be a data frame\n      if (!is.data.frame(x)) {\n        insight::format_error(\n          \"If `weights` is a string indicating a variable name, `x` must be a data frame.\"\n        ) # nolint\n      }\n      # is \"by\" a column in \"x\"?\n      if (!weights %in% colnames(x)) {\n        insight::format_error(sprintf(\n          \"The variable specified in `weights` was not found in `x`. %s\",\n          .misspelled_string(names(x), weights, \"Possibly misspelled?\")\n        ))\n      }\n      weights <- x[[weights]]\n    }\n    # is \"by\" of same length as \"x\"?\n    if (is.data.frame(x) && length(weights) != nrow(x)) {\n      insight::format_error(\n        \"Length of `weights` must be equal to number of rows in `x`.\"\n      ) # nolint\n    }\n    if (!is.data.frame(x) && length(weights) != length(x)) {\n      insight::format_error(\n        \"Length of `weights` must be equal to length of `x`.\"\n      ) # nolint\n    }\n  }\n\n  weights\n}\n"
  },
  {
    "path": "R/datawizard-package.R",
    "content": "#' `datawizard`\n#'\n#' @title datawizard: Easy Data Wrangling and Statistical Transformations\n#'\n#' @description\n#'\n#' A lightweight package to assist in key steps involved in any data analysis\n#' workflow:\n#'\n#'  - wrangling the raw data to get it in the needed form,\n#'  - applying preprocessing steps and statistical transformations, and\n#'  - compute statistical summaries of data properties and distributions.\n#'\n#' It is also the data wrangling backend for packages in 'easystats' ecosystem.\n#' Reference: Patil et al. (2022) \\doi{10.21105/joss.04684}.\n#'\n#' @docType package\n#' @aliases datawizard datawizard-package\n#' @name datawizard-package\n#' @keywords internal\n\"_PACKAGE\"\n"
  },
  {
    "path": "R/demean.R",
    "content": "#' Compute group-meaned and de-meaned variables\n#'\n#' @description\n#'\n#' `demean()` computes group- and de-meaned versions of a variable that can be\n#' used in regression analysis to model the between- and within-subject effect\n#' (person-mean centering or centering within clusters). `degroup()` is more\n#' generic in terms of the centering-operation. While `demean()` always uses\n#' mean-centering, `degroup()` can also use the mode or median for centering.\n#'\n#' @param x A data frame.\n#' @param select Character vector (or formula) with names of variables to select\n#'   that should be group- and de-meaned.\n#' @param by Character vector (or formula) with the name of the variable that\n#'   indicates the group- or cluster-ID. For cross-classified or nested designs,\n#'   `by` can also identify two or more variables as group- or cluster-IDs. If\n#'   the data is nested and should be treated as such, set `nested = TRUE`. Else,\n#'   if `by` defines two or more variables and `nested = FALSE`, a cross-classified\n#'   design is assumed. Note that `demean()` and `degroup()` can't handle a mix\n#'   of nested and cross-classified designs in one model.\n#'\n#'   For nested designs, `by` can be:\n#'   - a character vector with the name of the variable that indicates the\n#'     levels, ordered from *highest* level to *lowest* (e.g.\n#'     `by = c(\"L4\", \"L3\", \"L2\")`.\n#'   - a character vector with variable names in the format `by = \"L4/L3/L2\"`,\n#'     where the levels are separated by `/`.\n#'\n#'   See also section _De-meaning for cross-classified designs_ and\n#'   _De-meaning for nested designs_ below.\n#' @param nested Logical, if `TRUE`, the data is treated as nested. If `FALSE`,\n#'   the data is treated as cross-classified. Only applies if `by` contains more\n#'   than one variable.\n#' @param center Method for centering. `demean()` always performs\n#'   mean-centering, while `degroup()` can use `center = \"median\"` or\n#'   `center = \"mode\"` for median- or mode-centering, and also `\"min\"`\n#'   or `\"max\"`.\n#' @param suffix_demean,suffix_groupmean String value, will be appended to the\n#'   names of the group-meaned and de-meaned variables of `x`. By default,\n#'   de-meaned variables will be suffixed with `\"_within\"` and\n#'   grouped-meaned variables with `\"_between\"`.\n#' @param append Logical, if `TRUE` (default), the group- and de-meaned\n#'   variables will be appended (column bind) to the original data `x`,\n#'   thus returning both the original and the de-/group-meaned variables.\n#' @param add_attributes Logical, if `TRUE`, the returned variables gain\n#'   attributes to indicate the within- and between-effects. This is only\n#'   relevant when printing `model_parameters()` - in such cases, the\n#'   within- and between-effects are printed in separated blocks.\n#' @inheritParams center\n#'\n#' @return\n#' A data frame with the group-/de-meaned variables, which get the suffix\n#' `\"_between\"` (for the group-meaned variable) and `\"_within\"` (for the\n#' de-meaned variable) by default. For cross-classified or nested designs,\n#' the name pattern of the group-meaned variables is the name of the centered\n#' variable followed by the name of the variable that indicates the related\n#' grouping level, e.g. `predictor_L3_between` and `predictor_L2_between`.\n#'\n#' @seealso If grand-mean centering (instead of centering within-clusters)\n#'   is required, see [`center()`]. See [`performance::check_group_variation()`]\n#'   to check for heterogeneity bias.\n#'\n#' @section Heterogeneity Bias:\n#'\n#' Mixed models include different levels of sources of variability, i.e.\n#' error terms at each level. When macro-indicators (or level-2 predictors,\n#' or higher-level units, or more general: *group-level predictors that\n#' **vary** within and across groups*) are included as fixed effects (i.e.\n#' treated as covariate at level-1), the variance that is left unaccounted for\n#' this covariate will be absorbed into the error terms of level-1 and level-2\n#' (_Bafumi and Gelman 2006; Gelman and Hill 2007, Chapter 12.6._):\n#' \"Such covariates contain two parts: one that is specific to the higher-level\n#' entity that does not vary between occasions, and one that represents the\n#' difference between occasions, within higher-level entities\" (_Bell et al. 2015_).\n#' Hence, the error terms will be correlated with the covariate, which violates\n#' one of the assumptions of mixed models (iid, independent and identically\n#' distributed error terms). This bias is also called the *heterogeneity bias*\n#' (_Bell et al. 2015_). To resolve this problem, level-2 predictors used as\n#' (level-1) covariates should be separated into their \"within\" and \"between\"\n#' effects by \"de-meaning\" and \"group-meaning\": After demeaning time-varying\n#' predictors, \"at the higher level, the mean term is no longer constrained by\n#' Level 1 effects, so it is free to account for all the higher-level variance\n#' associated with that variable\" (_Bell et al. 2015_).\n#'\n#' @section Panel data and correlating fixed and group effects:\n#'\n#' `demean()` is intended to create group- and de-meaned variables for panel\n#' regression models (fixed effects models), or for complex\n#' random-effect-within-between models (see _Bell et al. 2015, 2018_), where\n#' group-effects (random effects) and fixed effects correlate (see\n#' _Bafumi and Gelman 2006_). This can happen, for instance, when analyzing\n#' panel data, which can lead to *Heterogeneity Bias*. To control for correlating\n#' predictors and group effects, it is recommended to include the group-meaned\n#' and de-meaned version of *time-varying covariates* (and group-meaned version\n#' of *time-invariant covariates* that are on a higher level, e.g. level-2\n#' predictors) in the model. By this, one can fit complex multilevel models for\n#' panel data, including time-varying predictors, time-invariant predictors and\n#' random effects.\n#'\n#' @section Why mixed models are preferred over fixed effects models:\n#'\n#' A mixed models approach can model the causes of endogeneity explicitly\n#' by including the (separated) within- and between-effects of time-varying\n#' fixed effects and including time-constant fixed effects. Furthermore,\n#' mixed models also include random effects, thus a mixed models approach\n#' is superior to classic fixed-effects models, which lack information of\n#' variation in the group-effects or between-subject effects. Furthermore,\n#' fixed effects regression cannot include random slopes, which means that\n#' fixed effects regressions are neglecting \"cross-cluster differences in the\n#' effects of lower-level controls (which) reduces the precision of estimated\n#' context effects, resulting in unnecessarily wide confidence intervals and\n#' low statistical power\" (_Heisig et al. 2017_).\n#'\n#' @section Terminology:\n#'\n#' The group-meaned variable is simply the mean of an independent variable\n#' within each group (or id-level or cluster) represented by `by`. It represents\n#' the cluster-mean of an independent variable. The regression coefficient of a\n#' group-meaned variable is the *between-subject-effect*. The de-meaned variable\n#' is then the centered version of the group-meaned variable. De-meaning is\n#' sometimes also called person-mean centering or centering within clusters.\n#' The regression coefficient of a de-meaned variable represents the\n#' *within-subject-effect*.\n#'\n#' @section De-meaning with continuous predictors:\n#'\n#' For continuous time-varying predictors, the recommendation is to include\n#' both their de-meaned and group-meaned versions as fixed effects, but not\n#' the raw (untransformed) time-varying predictors themselves. The de-meaned\n#' predictor should also be included as random effect (random slope). In\n#' regression models, the coefficient of the de-meaned predictors indicates\n#' the within-subject effect, while the coefficient of the group-meaned\n#' predictor indicates the between-subject effect.\n#'\n#' @section De-meaning with binary predictors:\n#'\n#' For binary time-varying predictors, there are two recommendations. First\n#' is to include the raw (untransformed) binary predictor as fixed effect\n#' only and the *de-meaned* variable as random effect (random slope).\n#' The alternative would be to add the de-meaned version(s) of binary\n#' time-varying covariates as additional fixed effect as well (instead of\n#' adding it as random slope). Centering time-varying binary variables to\n#' obtain within-effects (level 1) isn't necessary. They have a sensible\n#' interpretation when left in the typical 0/1 format (_Hoffmann 2015,\n#' chapter 8-2.I_). `demean()` will thus coerce categorical time-varying\n#' predictors to numeric to compute the de- and group-meaned versions for\n#' these variables, where the raw (untransformed) binary predictor and the\n#' de-meaned version should be added to the model.\n#'\n#' @section De-meaning of factors with more than 2 levels:\n#'\n#' Factors with more than two levels are demeaned in two ways: first, these\n#' are also converted to numeric and de-meaned; second, dummy variables\n#' are created (binary, with 0/1 coding for each level) and these binary\n#' dummy-variables are de-meaned in the same way (as described above).\n#' Packages like **panelr** internally convert factors to dummies before\n#' demeaning, so this behaviour can be mimicked here.\n#'\n#' @section De-meaning interaction terms:\n#'\n#' There are multiple ways to deal with interaction terms of within- and\n#' between-effects.\n#'\n#' - A classical approach is to simply use the product term of the de-meaned\n#'   variables (i.e. introducing the de-meaned variables as interaction term\n#'   in the model formula, e.g. `y ~ x_within * time_within`). This approach,\n#'   however, might be subject to bias (see _Giesselmann & Schmidt-Catran 2020_).\n#'\n#' - Another option is to first calculate the product term and then apply the\n#'   de-meaning to it. This approach produces an estimator \"that reflects\n#'   unit-level differences of interacted variables whose moderators vary\n#'   within units\", which is desirable if *no* within interaction of\n#'   two time-dependent variables is required. This is what `demean()` does\n#'   internally when `select` contains interaction terms.\n#'\n#' - A third option, when the interaction should result in a genuine within\n#'   estimator, is to \"double de-mean\" the interaction terms\n#'   (_Giesselmann & Schmidt-Catran 2018_), however, this is currently\n#'   not supported by `demean()`. If this is required, the `wmb()`\n#'   function from the **panelr** package should be used.\n#'\n#' To de-mean interaction terms for within-between models, simply specify\n#' the term as interaction for the `select`-argument, e.g. `select = \"a*b\"`\n#' (see 'Examples').\n#'\n#' @section De-meaning for cross-classified designs:\n#'\n#' `demean()` can handle cross-classified designs, where the data has two or\n#' more groups at the higher (i.e. second) level. In such cases, the\n#' `by`-argument can identify two or more variables that represent the\n#'  cross-classified group- or cluster-IDs. The de-meaned variables for\n#' cross-classified designs are simply subtracting all group means from each\n#' individual value, i.e. _fully cluster-mean-centering_ (see _Guo et al. 2024_\n#' for details). Note that de-meaning for cross-classified designs is *not*\n#' equivalent to de-meaning of nested data structures from models with three or\n#' more levels. Set `nested = TRUE` to explicitly assume a nested design. For\n#' cross-classified designs, de-meaning is supposed to work for models like\n#' `y ~ x + (1|level3) + (1|level2)`, but *not* for models like\n#' `y ~ x + (1|level3/level2)`. Note that `demean()` and `degroup()` can't\n#' handle a mix of nested and cross-classified designs in one model.\n#'\n#' @section De-meaning for nested designs:\n#'\n#' _Brincks et al. (2017)_ have suggested an algorithm to center variables for\n#' nested designs, which is implemented in `demean()`. For nested designs, set\n#' `nested = TRUE` *and* specify the variables that indicate the different\n#' levels in descending order in the `by` argument. E.g.,\n#' `by = c(\"level4\", \"level3, \"level2\")` assumes a model like\n#' `y ~ x + (1|level4/level3/level2)`. An alternative notation for the\n#' `by`-argument would be `by = \"level4/level3/level2\"`, similar to the\n#' formula notation.\n#'\n#' @section Analysing panel data with mixed models using lme4:\n#'\n#' A description of how to translate the formulas described in *Bell et al. 2018*\n#' into R using `lmer()` from **lme4** can be found in\n#' [this vignette](https://easystats.github.io/parameters/articles/demean.html).\n#'\n#' @references\n#'\n#'   - Bafumi J, Gelman A. 2006. Fitting Multilevel Models When Predictors\n#'     and Group Effects Correlate. In. Philadelphia, PA: Annual meeting of the\n#'     American Political Science Association.\n#'\n#'   - Bell A, Fairbrother M, Jones K. 2019. Fixed and Random Effects\n#'     Models: Making an Informed Choice. Quality & Quantity (53); 1051-1074\n#'\n#'   - Bell A, Jones K. 2015. Explaining Fixed Effects: Random Effects\n#'     Modeling of Time-Series Cross-Sectional and Panel Data. Political Science\n#'     Research and Methods, 3(1), 133–153.\n#'\n#'   - Brincks, A. M., Enders, C. K., Llabre, M. M., Bulotsky-Shearer, R. J.,\n#'     Prado, G., and Feaster, D. J. (2017). Centering Predictor Variables in\n#'     Three-Level Contextual Models. Multivariate Behavioral Research, 52(2),\n#'     149–163. https://doi.org/10.1080/00273171.2016.1256753\n#'\n#'   - Gelman A, Hill J. 2007. Data Analysis Using Regression and\n#'     Multilevel/Hierarchical Models. Analytical Methods for Social Research.\n#'     Cambridge, New York: Cambridge University Press\n#'\n#'   - Giesselmann M, Schmidt-Catran, AW. 2020. Interactions in fixed\n#'     effects regression models. Sociological Methods & Research, 1–28.\n#'     https://doi.org/10.1177/0049124120914934\n#'\n#'   - Guo Y, Dhaliwal J, Rights JD. 2024. Disaggregating level-specific effects\n#'     in cross-classified multilevel models. Behavior Research Methods, 56(4),\n#'     3023–3057.\n#'\n#'   - Heisig JP, Schaeffer M, Giesecke J. 2017. The Costs of Simplicity:\n#'     Why Multilevel Models May Benefit from Accounting for Cross-Cluster\n#'     Differences in the Effects of Controls. American Sociological Review 82\n#'     (4): 796–827.\n#'\n#'   - Hoffman L. 2015. Longitudinal analysis: modeling within-person\n#'     fluctuation and change. New York: Routledge\n#'\n#' @examples\n#'\n#' data(iris)\n#' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID\n#' iris$binary <- as.factor(rbinom(150, 1, 0.35)) # binary variable\n#'\n#' x <- demean(iris, select = c(\"Sepal.Length\", \"Petal.Length\"), by = \"ID\")\n#' head(x)\n#'\n#' x <- demean(iris, select = c(\"Sepal.Length\", \"binary\", \"Species\"), by = \"ID\")\n#' head(x)\n#'\n#'\n#' # demean interaction term x*y\n#' dat <- data.frame(\n#'   a = c(1, 2, 3, 4, 1, 2, 3, 4),\n#'   x = c(4, 3, 3, 4, 1, 2, 1, 2),\n#'   y = c(1, 2, 1, 2, 4, 3, 2, 1),\n#'   ID = c(1, 2, 3, 1, 2, 3, 1, 2)\n#' )\n#' demean(dat, select = c(\"a\", \"x*y\"), by = \"ID\")\n#'\n#' # or in formula-notation\n#' demean(dat, select = ~ a + x * y, by = ~ID)\n#'\n#' @export\ndemean <- function(\n  x,\n  select,\n  by,\n  nested = FALSE,\n  suffix_demean = \"_within\",\n  suffix_groupmean = \"_between\",\n  append = TRUE,\n  add_attributes = TRUE,\n  verbose = TRUE\n) {\n  degroup(\n    x = x,\n    select = select,\n    by = by,\n    nested = nested,\n    center = \"mean\",\n    suffix_demean = suffix_demean,\n    suffix_groupmean = suffix_groupmean,\n    append = append,\n    add_attributes = add_attributes,\n    verbose = verbose\n  )\n}\n\n\n#' @rdname demean\n#' @export\ndegroup <- function(\n  x,\n  select,\n  by,\n  nested = FALSE,\n  center = \"mean\",\n  suffix_demean = \"_within\",\n  suffix_groupmean = \"_between\",\n  append = TRUE,\n  add_attributes = TRUE,\n  verbose = TRUE\n) {\n  # ugly tibbles again... but save original data frame\n  original_data <- x\n  x <- .coerce_to_dataframe(x)\n\n  center <- match.arg(\n    tolower(center),\n    choices = c(\"mean\", \"median\", \"mode\", \"min\", \"max\")\n  )\n\n  if (inherits(select, \"formula\")) {\n    # formula to character, remove \"~\", split at \"+\". We don't use `all.vars()`\n    # here because we want to keep the interaction terms as they are\n    select <- trimws(unlist(\n      strsplit(\n        gsub(\"~\", \"\", insight::safe_deparse(select), fixed = TRUE),\n        \"+\",\n        fixed = TRUE\n      ),\n      use.names = FALSE\n    ))\n  }\n\n  # handle different \"by\" options\n  if (inherits(by, \"formula\")) {\n    by <- all.vars(by)\n  }\n\n  # we also allow lme4-syntax here: if by = \"L4/L3/L2\", we assume a nested design\n  if (length(by) == 1 && grepl(\"/\", by, fixed = TRUE)) {\n    by <- insight::trim_ws(unlist(\n      strsplit(by, \"/\", fixed = TRUE),\n      use.names = FALSE\n    ))\n    nested <- TRUE\n  }\n\n  # identify interaction terms\n  interactions_no <- select[!grepl(\"(\\\\*|\\\\:)\", select)]\n  interactions_yes <- select[grepl(\"(\\\\*|\\\\:)\", select)]\n\n  # if we have interaction terms that should be de-meaned, calculate the product\n  # of the terms first, then demean the product\n  if (length(interactions_yes)) {\n    interaction_terms <- lapply(\n      strsplit(interactions_yes, \"*\", fixed = TRUE),\n      trimws\n    )\n    product <- lapply(interaction_terms, function(i) do.call(`*`, x[, i]))\n    new_dat <- as.data.frame(stats::setNames(\n      product,\n      gsub(\"\\\\s\", \"\", gsub(\"*\", \"_\", interactions_yes, fixed = TRUE))\n    ))\n    x <- cbind(x, new_dat)\n    select <- c(interactions_no, colnames(new_dat))\n  }\n\n  # check if all variables are present\n  not_found <- setdiff(c(select, by), colnames(x))\n\n  if (length(not_found)) {\n    insight::format_error(\n      paste0(\n        \"Variable\",\n        ifelse(length(not_found) > 1, \"s \", \" \"),\n        text_concatenate(not_found, enclose = \"\\\"\"),\n        ifelse(length(not_found) > 1, \" were\", \" was\"),\n        \" not found in the dataset.\"\n      ),\n      .misspelled_string(\n        colnames(x),\n        not_found,\n        \"Possibly misspelled or not yet defined?\"\n      )\n    )\n  }\n\n  # get data to demean...\n  dat <- x[, c(select, by)]\n\n  # find categorical predictors that are coded as factors\n  categorical_predictors <- vapply(\n    dat[select],\n    is.factor,\n    FUN.VALUE = logical(1L)\n  )\n\n  # convert binary predictors to numeric\n  if (any(categorical_predictors)) {\n    # convert categorical to numeric, and then demean\n    dat[select[categorical_predictors]] <- lapply(\n      dat[select[categorical_predictors]],\n      function(i) as.numeric(i) - 1\n    )\n    # convert categorical to dummy, and demean each binary dummy\n    for (i in select[categorical_predictors]) {\n      if (nlevels(x[[i]]) > 2) {\n        for (j in levels(x[[i]])) {\n          # create vector with zeros\n          f <- rep(0, nrow(x))\n          # for each matching level, set dummy to 1\n          f[x[[i]] == j] <- 1\n          dummy <- data.frame(f)\n          # colnames = variable name + factor level\n          # also add new dummy variables to \"select\"\n          colnames(dummy) <- sprintf(\"%s_%s\", i, j)\n          select <- c(select, sprintf(\"%s_%s\", i, j))\n          # add to data\n          dat <- cbind(dat, dummy)\n        }\n      }\n    }\n    # tell user...\n    if (isTRUE(verbose)) {\n      insight::format_alert(\n        paste0(\n          \"Categorical predictors (\",\n          toString(names(categorical_predictors)[categorical_predictors]),\n          \") have been coerced to numeric values to compute de- and group-meaned variables.\\n\"\n        )\n      )\n    }\n  }\n\n  # group variables, then calculate the mean-value\n  # for variables within each group (the group means). assign\n  # mean values to a vector of same length as the data\n\n  gm_fun <- switch(\n    center,\n    mode = function(.gm) distribution_mode(stats::na.omit(.gm)),\n    median = function(.gm) stats::median(.gm, na.rm = TRUE),\n    min = function(.gm) min(.gm, na.rm = TRUE),\n    max = function(.gm) max(.gm, na.rm = TRUE),\n    function(.gm) mean(.gm, na.rm = TRUE)\n  )\n\n  # we allow disaggregating level-specific effects for cross-classified multilevel\n  # models (see Guo et al. 2024). Two levels should work as proposed by the authors,\n  # more levels also already work, but need to check the formula from the paper\n  # and validate results\n\n  if (length(by) == 1) {\n    # simple case: one level\n    group_means_list <- lapply(select, function(i) {\n      stats::ave(dat[[i]], dat[[by]], FUN = gm_fun)\n    })\n    names(group_means_list) <- select\n    # create de-meaned variables by subtracting the group mean from each individual value\n    person_means_list <- lapply(select, function(i) {\n      dat[[i]] - group_means_list[[i]]\n    })\n  } else if (nested) {\n    # nested design: by > 1, nested is explicitly set to TRUE\n    # We want:\n    # L3_between = xbar(k)\n    # L2_between = xbar(j,k) - xbar(k)\n    # L1_within = x(ijk) - xbar(jk)\n    # , where\n    # x(ijk) is the individual value / variable that is measured on level 1\n    # xbar(k) <- ave(x_ijk, L3, FUN = mean), the group mean of the variable at highest level\n    # xbar(jk) <- ave(x_ijk, L3, L2, FUN = mean), the group mean of the variable at second level\n    group_means_list <- lapply(select, function(i) {\n      out <- lapply(seq_along(by), function(k) {\n        stats::ave(dat[[i]], dat[, by[1:k], drop = FALSE], FUN = gm_fun)\n      })\n      # subtract mean of higher level from lower level\n      for (j in 2:length(by)) {\n        out[[j]] <- out[[j]] - out[[j - 1]]\n      }\n      names(out) <- paste0(i, \"_\", by)\n      out\n    })\n    # create de-meaned variables by subtracting the group mean from each individual value\n    person_means_list <- lapply(\n      # seq_along(select),\n      # function(i) dat[[select[i]]] - group_means_list[[i]][[length(by)]]\n      select,\n      function(i) {\n        dat[[i]] - stats::ave(dat[[i]], dat[, by, drop = FALSE], FUN = gm_fun)\n      }\n    )\n  } else {\n    # cross-classified design: by > 1\n    group_means_list <- lapply(by, function(j) {\n      out <- lapply(select, function(i) {\n        stats::ave(dat[[i]], dat[[j]], FUN = gm_fun)\n      })\n      names(out) <- paste0(select, \"_\", j)\n      out\n    })\n    group_means_list <- unlist(group_means_list, recursive = FALSE)\n\n    # de-meaned variables for cross-classified design is simply subtracting\n    # all group means from each individual value\n    person_means_list <- lapply(select, function(i) {\n      sum_group_means <- Reduce(\"+\", group_means_list[paste0(i, \"_\", by)])\n      dat[[i]] - sum_group_means\n    })\n  }\n\n  # preserve names\n  names(person_means_list) <- select\n\n  # convert to data frame and add suffix to column names\n\n  group_means <- as.data.frame(group_means_list)\n  person_means <- as.data.frame(person_means_list)\n\n  colnames(person_means) <- sprintf(\n    \"%s%s\",\n    colnames(person_means),\n    suffix_demean\n  )\n  colnames(group_means) <- sprintf(\n    \"%s%s\",\n    colnames(group_means),\n    suffix_groupmean\n  )\n\n  if (isTRUE(add_attributes)) {\n    person_means[] <- lapply(person_means, function(i) {\n      attr(i, \"within-effect\") <- TRUE\n      i\n    })\n    group_means[] <- lapply(group_means, function(i) {\n      attr(i, \"between-effect\") <- TRUE\n      i\n    })\n  }\n\n  # between and within effects\n  out <- cbind(group_means, person_means)\n\n  # append to original data?\n  if (isTRUE(append)) {\n    # check for unique column names\n    duplicated_columns <- intersect(colnames(out), colnames(original_data))\n    if (length(duplicated_columns)) {\n      insight::format_error(paste0(\n        \"One or more of the centered variables already exist in the orignal data frame: \", # nolint\n        text_concatenate(duplicated_columns, enclose = \"`\"),\n        \". Please rename the affected variable(s) in the original data, or use the arguments `suffix_demean` and `suffix_groupmean` to rename the centered variables.\" # nolint\n      ))\n    }\n    out <- cbind(original_data, out)\n  }\n\n  out\n}\n\n\n#' @rdname demean\n#' @export\ndetrend <- degroup\n"
  },
  {
    "path": "R/describe_distribution.R",
    "content": "#' Describe a distribution\n#'\n#' This function describes a distribution by a set of indices (e.g., measures of\n#' centrality, dispersion, range, skewness, (excess) kurtosis).\n#'\n#' @param x A numeric vector, a character vector, a data frame, or a list. See\n#' `Details`.\n#' @param by Column names indicating how to split the data in various groups\n#' before describing the distribution. `by` groups will be added to potentially\n#' existing groups created by `data_group()`.\n#' @param range Return the range (min and max).\n#' @param quartiles Return the first and third quartiles (25th and 75th\n#'   percentiles).\n#' @param include_factors Logical, if `TRUE`, factors are included in the\n#'   output, however, only columns for range (first and last factor levels) as\n#'   well as n and missing will contain information.\n#' @param ci Confidence Interval (CI) level. Default is `NULL`, i.e. no\n#'   confidence intervals are computed. If not `NULL`, confidence intervals are\n#'   based on bootstrap replicates (see `iterations`).\n#' @param iterations The number of bootstrap replicates for computing confidence\n#'   intervals. Only applies when `ci` is not `NULL`. Defaults to `100`. For\n#'   more stable results, increase the number of `iterations`, but note that this\n#'   can also increase the computation time significantly.\n#' @param iqr Logical, if `TRUE`, the interquartile range is calculated (based\n#'   on [stats::IQR()], using `type = 6`).\n#' @param verbose Show or silence warnings and messages.\n#' @inheritParams bayestestR::point_estimate\n#' @inheritParams extract_column_names\n#'\n#' @details If `x` is a data frame, only numeric variables are kept and will be\n#' displayed in the summary by default.\n#'\n#' If `x` is a list, the behavior is different whether `x` is a stored list. If\n#' `x` is stored (for example, `describe_distribution(mylist)` where `mylist`\n#' was created before), artificial variable names are used in the summary\n#' (`Var_1`, `Var_2`, etc.). If `x` is an unstored list (for example,\n#' `describe_distribution(list(mtcars$mpg))`), then `\"mtcars$mpg\"` is used as\n#' variable name.\n#'\n#' @note There is also a\n#'   [`plot()`-method](https://easystats.github.io/see/articles/parameters.html)\n#'   implemented in the [**see**-package](https://easystats.github.io/see/).\n#'\n#' @seealso [kurtosis()] to compute kurtosis (recognized as excess kurtosis).\n#'\n#' @return A data frame with columns that describe the properties of the variables.\n#'\n#' @examplesIf require(\"bayestestR\", quietly = TRUE)\n#' describe_distribution(rnorm(100))\n#'\n#' data(iris)\n#' describe_distribution(iris)\n#' describe_distribution(iris, include_factors = TRUE, quartiles = TRUE)\n#' describe_distribution(list(mtcars$mpg, mtcars$cyl))\n#' @export\ndescribe_distribution <- function(x, ...) {\n  UseMethod(\"describe_distribution\")\n}\n\n\n#' @export\ndescribe_distribution.default <- function(x, verbose = TRUE, ...) {\n  if (verbose) {\n    insight::format_warning(\n      paste0(\"Can't describe variables of class `\", class(x)[1], \"`.\")\n    )\n  }\n  NULL\n}\n\n\n#' @export\ndescribe_distribution.list <- function(\n  x,\n  centrality = \"mean\",\n  dispersion = TRUE,\n  iqr = TRUE,\n  range = TRUE,\n  quartiles = FALSE,\n  ci = NULL,\n  include_factors = FALSE,\n  iterations = 100,\n  threshold = 0.1,\n  verbose = TRUE,\n  ...\n) {\n  factor_el <- which(vapply(x, is.factor, FUN.VALUE = logical(1L)))\n  num_el <- which(vapply(x, is.numeric, FUN.VALUE = logical(1L)))\n\n  # get elements names as is\n  # ex: `list(mtcars$mpg, mtcars$cyl) -> c(\"mtcars$mpg\", \"mtcars$cyl\")`\n  nm <- vapply(\n    sys.call()[[2]],\n    insight::safe_deparse,\n    FUN.VALUE = character(1L)\n  )[-1]\n\n  if (isTRUE(include_factors)) {\n    x <- x[c(num_el, factor_el)]\n    if (length(nm) != 0) {\n      nm <- nm[c(num_el, factor_el)]\n    }\n  } else {\n    x <- x[num_el]\n    if (length(nm) != 0) {\n      nm <- nm[num_el]\n    }\n  }\n\n  # Not possible to obtain elements names if they are stored in\n  # an object\n  if (length(nm) == 0) {\n    nm <- paste0(\"Var_\", seq_along(x))\n  }\n\n  # The function currently doesn't support descriptive summaries for character\n  # or factor types.\n  out <- do.call(\n    rbind,\n    lapply(x, function(i) {\n      if (\n        (include_factors && is.factor(i)) || (!is.character(i) && !is.factor(i))\n      ) {\n        describe_distribution(\n          i,\n          centrality = centrality,\n          dispersion = dispersion,\n          iqr = iqr,\n          range = range,\n          quartiles = quartiles,\n          ci = ci,\n          iterations = iterations,\n          threshold = threshold,\n          verbose = verbose\n        )\n      }\n    })\n  )\n\n  if (is.null(names(x))) {\n    new_names <- nm\n  } else {\n    empty_names <- which(!nzchar(names(x), keepNA = TRUE))\n    new_names <- names(x)\n    new_names[empty_names] <- nm[empty_names]\n  }\n\n  out$Variable <- new_names\n  row.names(out) <- NULL\n  out <- out[c(\"Variable\", setdiff(colnames(out), \"Variable\"))]\n\n  class(out) <- unique(c(\n    \"parameters_distribution\",\n    \"see_parameters_distribution\",\n    class(out)\n  ))\n  attr(out, \"object_name\") <- deparse(substitute(x), width.cutoff = 500)\n  attr(out, \"ci\") <- ci\n  attr(out, \"centrality\") <- centrality\n  attr(out, \"threshold\") <- threshold\n  out\n}\n\n\n#' @rdname describe_distribution\n#' @export\ndescribe_distribution.numeric <- function(\n  x,\n  centrality = \"mean\",\n  dispersion = TRUE,\n  iqr = TRUE,\n  range = TRUE,\n  quartiles = FALSE,\n  ci = NULL,\n  iterations = 100,\n  threshold = 0.1,\n  verbose = TRUE,\n  ...\n) {\n  insight::check_if_installed(\"bayestestR\")\n  out <- data.frame(.temp = 0)\n\n  # Missing\n  n_missing <- sum(is.na(x))\n  x <- stats::na.omit(x)\n\n  # Point estimates\n  out <- cbind(\n    out,\n    bayestestR::point_estimate(\n      x,\n      centrality = centrality,\n      dispersion = dispersion,\n      threshold = threshold,\n      verbose = verbose,\n      ...\n    )\n  )\n\n  # interquartile range, type same as minitab and SPSS\n  if (iqr) {\n    out$IQR <- stats::IQR(x, na.rm = TRUE, type = 6)\n  }\n\n  # Confidence Intervals\n  if (!is.null(ci)) {\n    insight::check_if_installed(\"boot\")\n    # tell user about bootstrapping and appropriate number of iterations.\n    # \"show_iterations_msg\" is an undocumented argument that is only passed\n    # internally to this function to avoid multiple repeated messages\n    if (!isFALSE(list(...)$show_iterations_msg)) {\n      .show_iterations_warning(verbose, iterations, ci)\n    }\n    # calculate CI for each centrality\n    for (cntr in .centrality_options(centrality)) {\n      results <- tryCatch(\n        {\n          boot::boot(\n            data = x,\n            statistic = .boot_distribution,\n            R = iterations,\n            centrality = cntr\n          )\n        },\n        error = function(e) {\n          msg <- conditionMessage(e)\n          if (!is.null(msg) && msg == \"sample is too sparse to find TD\") {\n            insight::format_warning(\n              \"When bootstrapping CIs, sample was too sparse to find TD. Returning NA for CIs.\"\n            )\n            list(t = c(NA_real_, NA_real_))\n          }\n        }\n      )\n      out_ci <- bayestestR::ci(results$t, ci = ci, verbose = FALSE)\n      ci_data <- data.frame(out_ci$CI_low[1], out_ci$CI_high[1])\n      colnames(ci_data) <- c(paste0(\"CI_low_\", cntr), paste0(\"CI_high_\", cntr))\n      out <- cbind(out, ci_data)\n    }\n  }\n\n  # Range\n  if (range) {\n    out <- cbind(\n      out,\n      data.frame(\n        Min = min(x, na.rm = TRUE),\n        Max = max(x, na.rm = TRUE)\n      )\n    )\n  }\n\n  # Quartiles\n  if (quartiles) {\n    out <- cbind(\n      out,\n      data.frame(\n        Q1 = stats::quantile(x, probs = 0.25, na.rm = TRUE),\n        Q3 = stats::quantile(x, probs = 0.75, na.rm = TRUE)\n      )\n    )\n  }\n\n  # Skewness\n  out <- cbind(\n    out,\n    data.frame(\n      Skewness = as.numeric(skewness(x, verbose = verbose)),\n      Kurtosis = as.numeric(kurtosis(x, verbose = verbose))\n    )\n  )\n\n  out$n <- length(x)\n  out$n_Missing <- n_missing\n  out$.temp <- NULL\n\n  class(out) <- unique(c(\n    \"parameters_distribution\",\n    \"see_parameters_distribution\",\n    class(out)\n  ))\n  attr(out, \"data\") <- x\n  attr(out, \"ci\") <- ci\n  attr(out, \"centrality\") <- centrality\n  attr(out, \"threshold\") <- threshold\n  out\n}\n\n\n#' @rdname describe_distribution\n#' @export\ndescribe_distribution.factor <- function(\n  x,\n  dispersion = TRUE,\n  range = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  # Missing\n  n_missing <- sum(is.na(x))\n  x <- stats::na.omit(x)\n\n  out <- data.frame(\n    Mean = NA,\n    SD = NA,\n    CI_low = NA,\n    CI_high = NA,\n    IQR = NA,\n    Min = levels(x)[1],\n    Max = levels(x)[nlevels(x)],\n    Q1 = NA,\n    Q3 = NA,\n    Skewness = as.numeric(skewness(x, verbose = verbose)),\n    Kurtosis = as.numeric(kurtosis(x, verbose = verbose)),\n    n = length(x),\n    n_Missing = n_missing,\n    stringsAsFactors = FALSE\n  )\n\n  if (!dispersion) {\n    out$SD <- NULL\n  }\n\n  dot_args <- list(...)\n\n  if (is.null(dot_args[[\"ci\"]])) {\n    out$CI_low <- NULL\n    out$CI_high <- NULL\n  }\n\n  if (is.null(dot_args[[\"iqr\"]]) || isFALSE(dot_args[[\"iqr\"]])) {\n    out$IQR <- NULL\n  }\n\n  if (is.null(dot_args[[\"quartiles\"]]) || isFALSE(dot_args[[\"quartiles\"]])) {\n    out$Q1 <- NULL\n    out$Q3 <- NULL\n  }\n\n  if (!range) {\n    out$Min <- NULL\n    out$Max <- NULL\n  }\n\n  class(out) <- unique(c(\n    \"parameters_distribution\",\n    \"see_parameters_distribution\",\n    class(out)\n  ))\n  attr(out, \"data\") <- x\n  out\n}\n\n\n#' @export\ndescribe_distribution.character <- function(\n  x,\n  dispersion = TRUE,\n  range = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  # Missing\n  n_missing <- sum(is.na(x))\n  x <- stats::na.omit(x)\n  values <- unique(x)\n\n  out <- data.frame(\n    Mean = NA,\n    SD = NA,\n    IQR = NA,\n    CI_low = NA,\n    CI_high = NA,\n    Min = values[1],\n    Max = values[length(values)],\n    Q1 = NA,\n    Q3 = NA,\n    Skewness = as.numeric(skewness(x, verbose = verbose)),\n    Kurtosis = as.numeric(kurtosis(x, verbose = verbose)),\n    n = length(x),\n    n_Missing = n_missing,\n    stringsAsFactors = FALSE\n  )\n\n  if (!dispersion) {\n    out$SD <- NULL\n  }\n\n  dot_args <- list(...)\n  if (is.null(dot_args[[\"ci\"]])) {\n    out$CI_low <- NULL\n    out$CI_high <- NULL\n  }\n\n  if (is.null(dot_args[[\"iqr\"]]) || isFALSE(dot_args[[\"iqr\"]])) {\n    out$IQR <- NULL\n  }\n\n  if (is.null(dot_args[[\"quartiles\"]]) || isFALSE(dot_args[[\"quartiles\"]])) {\n    out$Q1 <- NULL\n    out$Q3 <- NULL\n  }\n\n  if (!range) {\n    out$Min <- NULL\n    out$Max <- NULL\n  }\n\n  class(out) <- unique(c(\n    \"parameters_distribution\",\n    \"see_parameters_distribution\",\n    class(out)\n  ))\n  attr(out, \"data\") <- x\n  out\n}\n\n\n#' @rdname describe_distribution\n#' @export\ndescribe_distribution.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  centrality = \"mean\",\n  dispersion = TRUE,\n  iqr = TRUE,\n  range = TRUE,\n  quartiles = FALSE,\n  include_factors = FALSE,\n  ci = NULL,\n  iterations = 100,\n  threshold = 0.1,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  by = NULL,\n  ...\n) {\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # check for reserved variable names\n  .check_for_reserved_names(select)\n\n  # tell user about bootstrapping and appropriate number of iterations\n  .show_iterations_warning(verbose, iterations, ci)\n\n  if (!is.null(by)) {\n    if (!is.character(by)) {\n      insight::format_error(\"`by` must be a character vector.\")\n    }\n    x <- data_group(x, by)\n    out <- describe_distribution(\n      x,\n      select = select,\n      exclude = exclude,\n      centrality = centrality,\n      dispersion = dispersion,\n      iqr = iqr,\n      range = range,\n      quartiles = quartiles,\n      include_factors = include_factors,\n      ci = ci,\n      iterations = iterations,\n      threshold = threshold,\n      ignore_case = ignore_case,\n      regex = regex,\n      verbose = verbose\n    )\n    out <- data_ungroup(out)\n    return(out)\n  }\n\n  # The function currently doesn't support descriptive summaries for character\n  # or factor types.\n  out <- do.call(\n    rbind,\n    lapply(x[select], function(i) {\n      if (\n        (include_factors && is.factor(i)) || (!is.character(i) && !is.factor(i))\n      ) {\n        describe_distribution(\n          i,\n          centrality = centrality,\n          dispersion = dispersion,\n          iqr = iqr,\n          range = range,\n          quartiles = quartiles,\n          ci = ci,\n          iterations = iterations,\n          threshold = threshold,\n          verbose = verbose,\n          show_iterations_msg = FALSE\n        )\n      }\n    })\n  )\n\n  if (is.null(out)) {\n    return(NULL)\n  }\n\n  out$Variable <- row.names(out)\n  row.names(out) <- NULL\n  out <- out[c(\"Variable\", setdiff(colnames(out), \"Variable\"))]\n\n  class(out) <- unique(c(\n    \"parameters_distribution\",\n    \"see_parameters_distribution\",\n    class(out)\n  ))\n  attr(out, \"object_name\") <- deparse(substitute(x), width.cutoff = 500)\n  attr(out, \"ci\") <- ci\n  attr(out, \"centrality\") <- centrality\n  attr(out, \"threshold\") <- threshold\n  out\n}\n\n\n#' @export\ndescribe_distribution.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  centrality = \"mean\",\n  dispersion = TRUE,\n  iqr = TRUE,\n  range = TRUE,\n  quartiles = FALSE,\n  include_factors = FALSE,\n  ci = NULL,\n  iterations = 100,\n  threshold = 0.1,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  by = NULL,\n  ...\n) {\n  if (!is.null(by)) {\n    if (!is.character(by)) {\n      insight::format_error(\"`by` must be a character vector.\")\n    }\n    existing_grps <- setdiff(colnames(attributes(x)$groups), \".rows\")\n    x <- data_group(x, c(existing_grps, by))\n  }\n  group_vars <- setdiff(colnames(attributes(x)$groups), \".rows\")\n  group_data <- expand.grid(lapply(x[group_vars], function(i) unique(sort(i))))\n  groups <- split(x, x[group_vars])\n  groups <- Filter(function(x) nrow(x) > 0, groups)\n\n  # check for reserved variable names\n  .check_for_reserved_names(group_vars, type = \"group_vars\")\n\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # tell user about bootstrapping and appropriate number of iterations\n  .show_iterations_warning(verbose, iterations, ci)\n\n  out <- do.call(\n    rbind,\n    lapply(seq_along(groups), function(i) {\n      d <- describe_distribution.data.frame(\n        groups[[i]][select],\n        centrality = centrality,\n        dispersion = dispersion,\n        iqr = iqr,\n        range = range,\n        quartiles = quartiles,\n        include_factors = include_factors,\n        ci = ci,\n        iterations = iterations,\n        threshold = threshold,\n        verbose = verbose,\n        show_iterations_msg = FALSE,\n        ...\n      )\n\n      for (grp in seq_along(group_vars)) {\n        d[[group_vars[grp]]] <- group_data[i, grp]\n      }\n      d <- data_relocate(d, group_vars, before = 1)\n\n      d\n    })\n  )\n\n  class(out) <- unique(c(\n    \"parameters_distribution\",\n    \"see_parameters_distribution\",\n    class(out)\n  ))\n  attr(out, \"object_name\") <- deparse(substitute(x), width.cutoff = 500)\n  attr(out, \"ci\") <- ci\n  attr(out, \"centrality\") <- centrality\n  attr(out, \"threshold\") <- threshold\n  out\n}\n\n\n# methods ------------------\n\n#' @export\nprint.parameters_distribution <- function(x, digits = 2, ...) {\n  formatted_table <- format(\n    x,\n    digits = digits,\n    format = \"text\",\n    ci_width = NULL,\n    ci_brackets = TRUE,\n    ...\n  )\n  cat(insight::export_table(\n    formatted_table,\n    format = \"text\",\n    digits = digits,\n    ...\n  ))\n  invisible(x)\n}\n\n\n#' @export\nprint_md.parameters_distribution <- function(\n  x,\n  digits = 2,\n  ci_brackets = c(\"(\", \")\"),\n  ...\n) {\n  formatted_table <- format(\n    x = x,\n    digits = digits,\n    format = \"markdown\",\n    ci_width = NULL,\n    ci_brackets = ci_brackets,\n    ...\n  )\n\n  insight::export_table(\n    formatted_table,\n    format = \"markdown\",\n    align = \"firstleft\",\n    ...\n  )\n}\n\n\n#' @export\nprint_html.parameters_distribution <- function(\n  x,\n  digits = 2,\n  ci_brackets = c(\"(\", \")\"),\n  ...\n) {\n  formatted_table <- format(\n    x = x,\n    digits = digits,\n    format = \"html\",\n    ci_width = NULL,\n    ci_brackets = ci_brackets,\n    ...\n  )\n\n  # determine backend\n  backend <- .check_format_backend(...)\n\n  # pass arguments to export_table\n  fun_args <- list(\n    formatted_table,\n    format = backend,\n    ...\n  )\n\n  # no \"align\" for format \"tt\" - this currently gives an error. Not sure\n  # if related to insight::export_table or tinytable\n  if (identical(backend, \"html\")) {\n    fun_args$align <- \"firstleft\"\n  }\n\n  do.call(insight::export_table, fun_args)\n}\n\n\n#' @export\ndisplay.parameters_distribution <- function(\n  object,\n  format = \"markdown\",\n  digits = 2,\n  ...\n) {\n  format <- .display_default_format(format)\n\n  fun_args <- list(\n    x = object,\n    digits = digits,\n    ...\n  )\n\n  # print table in HTML or markdown format\n  if (format %in% c(\"html\", \"tt\")) {\n    fun_args$backend <- format\n    do.call(print_html, fun_args)\n  } else {\n    do.call(print_md, fun_args)\n  }\n}\n\n\n#' @export\nplot.parameters_distribution <- function(x, ...) {\n  insight::check_if_installed(\"see\")\n  NextMethod()\n}\n\n\n# bootstrapping CIs ----------------------------------\n\n.boot_distribution <- function(data, indices, centrality) {\n  out <- datawizard::describe_distribution(\n    data[indices],\n    centrality = centrality,\n    dispersion = FALSE,\n    iqr = FALSE,\n    range = FALSE,\n    quartiles = FALSE,\n    ci = NULL,\n    verbose = FALSE\n  )\n  out[[1]]\n}\n\n\n# check centrality options ----------------------------------------\n\n.centrality_options <- function(centrality) {\n  if (identical(centrality, \"all\")) {\n    c(\"mean\", \"median\", \"MAP\")\n  } else {\n    centrality\n  }\n}\n\n\n# sanity check ----------------------------------------\n\n.check_for_reserved_names <- function(x, type = \"select\") {\n  reserved_names <- c(\n    \"Variable\",\n    \"CI_low\",\n    \"CI_high\",\n    \"n_Missing\",\n    \"Q1\",\n    \"Q3\",\n    \"Quartiles\",\n    \"Min\",\n    \"Max\",\n    \"Range\",\n    \"Trimmed_Mean\",\n    \"Trimmed\",\n    \"Mean\",\n    \"SD\",\n    \"IQR\",\n    \"Skewness\",\n    \"Kurtosis\",\n    \"n\",\n    \"Median\",\n    \"MAD\",\n    \"MAP\",\n    \"IQR\",\n    \"n_Missing\"\n  )\n  invalid_names <- intersect(reserved_names, x)\n\n  if (length(invalid_names) > 0) {\n    # adapt message to show user whether wrong variables appear in grouping or select\n    msg <- switch(\n      type,\n      select = \"with `describe_distribution()`: \",\n      \"as grouping variables in `describe_distribution()`: \"\n    )\n    insight::format_error(paste0(\n      \"Following variable names are reserved and cannot be used \",\n      msg,\n      text_concatenate(invalid_names, enclose = \"`\"),\n      \". Please rename these variables in your data.\"\n    ))\n  }\n}\n\n\n.show_iterations_warning <- function(verbose, iterations = 100, ci = NULL) {\n  if (verbose && !is.null(ci)) {\n    msg <- paste(\n      \"Bootstrapping confidence intervals using\",\n      iterations,\n      \"iterations, please be patient...\"\n    )\n    if (iterations < 200) {\n      msg <- c(\n        msg,\n        \"For more stable intervals, increase the number of `iterations`, but note that this can also increase the computation time significantly.\"\n      ) # nolint\n    }\n    insight::format_alert(msg)\n  }\n}\n"
  },
  {
    "path": "R/descriptives.R",
    "content": "# distribution_mode ----------------------------------\n\n#' Compute mode for a statistical distribution\n#'\n#' @param x An atomic vector, a list, or a data frame.\n#'\n#' @return\n#'\n#' The value that appears most frequently in the provided data.\n#' The returned data structure will be the same as the entered one.\n#'\n#' @seealso For continuous variables, the\n#'   **Highest Maximum a Posteriori probability estimate (MAP)** may be\n#'   a more useful way to estimate the most commonly-observed value\n#'   than the mode. See [bayestestR::map_estimate()].\n#'\n#' @examples\n#'\n#' distribution_mode(c(1, 2, 3, 3, 4, 5))\n#' distribution_mode(c(1.5, 2.3, 3.7, 3.7, 4.0, 5))\n#'\n#' @export\ndistribution_mode <- function(x) {\n  # TODO: Add support for weights, trim, binned (method)\n  uniqv <- unique(x)\n  tab <- tabulate(match(x, uniqv))\n  idx <- which.max(tab)\n  uniqv[idx]\n}\n\n#' Compute the coefficient of variation\n#'\n#' Compute the coefficient of variation (CV, ratio of the standard deviation to\n#' the mean, \\eqn{\\sigma/\\mu}) for a set of numeric values.\n#'\n#' @return The computed coefficient of variation for `x`.\n#' @export\n#'\n#' @examples\n#' coef_var(1:10)\n#' coef_var(c(1:10, 100), method = \"median_mad\")\n#' coef_var(c(1:10, 100), method = \"qcd\")\n#' coef_var(mu = 10, sigma = 20)\n#' coef_var(mu = 10, sigma = 20, method = \"unbiased\", n = 30)\ncoef_var <- function(x, ...) {\n  UseMethod(\"coef_var\")\n}\n#' @name distribution_cv\n#' @rdname coef_var\n#' @export\ndistribution_coef_var <- coef_var\n\n#' @export\ncoef_var.default <- function(x, verbose = TRUE, ...) {\n  if (verbose) {\n    insight::format_warning(\n      paste0(\n        \"Can't compute the coefficient of variation objects of class `\",\n        class(x)[1],\n        \"`.\"\n      )\n    )\n  }\n  NULL\n}\n\n#' @param x A numeric vector of ratio scale (see details), or vector of values than can be coerced to one.\n#' @param mu A numeric vector of mean values to use to compute the coefficient\n#'   of variation. If supplied, `x` is not used to compute the mean.\n#' @param sigma A numeric vector of standard deviation values to use to compute the coefficient\n#'   of variation. If supplied, `x` is not used to compute the SD.\n#' @param method Method to use to compute the CV. Can be `\"standard\"` to compute\n#'   by dividing the standard deviation by the mean, `\"unbiased\"` for the\n#'   unbiased estimator for normally distributed data, or one of two robust\n#'   alternatives: `\"median_mad\"` to divide the median by the [stats::mad()],\n#'   or `\"qcd\"` (quartile coefficient of dispersion, interquartile range divided\n#'   by the sum of the quartiles \\[twice the midhinge\\]: \\eqn{(Q_3 - Q_1)/(Q_3 + Q_1)}.\n#' @param trim the fraction (0 to 0.5) of values to be trimmed from\n#'   each end of `x` before the mean and standard deviation (or other measures)\n#'   are computed. Values of `trim` outside the range of (0 to 0.5) are taken\n#'   as the nearest endpoint.\n#' @param remove_na Logical. Should `NA` values be removed before computing (`TRUE`)\n#'   or not (`FALSE`, default)?\n#' @param n If `method = \"unbiased\"` and both `mu` and `sigma` are provided (not\n#'   computed from `x`), what sample size to use to adjust the computed CV\n#'   for small-sample bias?\n#' @param ... Further arguments passed to computation functions.\n#'\n#' @details\n#' CV is only applicable of values taken on a ratio scale: values that have a\n#' *fixed* meaningfully defined 0 (which is either the lowest or highest\n#' possible value), and that ratios between them are interpretable For example,\n#' how many sandwiches have I eaten this week? 0 means \"none\" and 20 sandwiches\n#' is 4 times more than 5 sandwiches. If I were to center the number of\n#' sandwiches, it will no longer be on a ratio scale (0 is no \"none\" it is the\n#' mean, and the ratio between 4 and -2 is not meaningful). Scaling a ratio\n#' scale still results in a ratio scale. So I can re define \"how many half\n#' sandwiches did I eat this week ( = sandwiches * 0.5) and 0 would still mean\n#' \"none\", and 20 half-sandwiches is still 4 times more than 5 half-sandwiches.\n#'\n#' This means that CV is **NOT** invariant to shifting, but it is to scaling:\n\n#' ```{r}\n#' sandwiches <- c(0, 4, 15, 0, 0, 5, 2, 7)\n#' coef_var(sandwiches)\n#'\n#' coef_var(sandwiches / 2) # same\n#'\n#' coef_var(sandwiches + 4) # different! 0 is no longer meaningful!\n#' ````\n#'\n#' @rdname coef_var\n#'\n#' @export\ncoef_var.numeric <- function(\n  x,\n  mu = NULL,\n  sigma = NULL,\n  method = c(\"standard\", \"unbiased\", \"median_mad\", \"qcd\"),\n  trim = 0,\n  remove_na = FALSE,\n  n = NULL,\n  ...\n) {\n  # TODO: Support weights\n  if (!missing(x) && all(c(-1, 1) %in% sign(x))) {\n    insight::format_error(\n      \"Coefficient of variation only applicable for ratio scale variables.\"\n    )\n  }\n  method <- match.arg(\n    method,\n    choices = c(\"standard\", \"unbiased\", \"median_mad\", \"qcd\")\n  )\n  if (is.null(mu) || is.null(sigma)) {\n    if (isTRUE(remove_na)) {\n      x <- .drop_na(x)\n    }\n    n <- length(x)\n    x <- .trim_values(x, trim = trim, n = n)\n  }\n  if (is.null(mu)) {\n    mu <- switch(\n      method,\n      standard = ,\n      unbiased = mean(x, ...),\n      median_mad = stats::median(x, ...),\n      qcd = unname(sum(stats::quantile(x, probs = c(0.25, 0.75), ...)))\n    )\n  }\n  if (is.null(sigma)) {\n    sigma <- switch(\n      method,\n      standard = ,\n      unbiased = stats::sd(x, ...),\n      median_mad = stats::mad(x, center = mu, ...),\n      qcd = unname(diff(stats::quantile(x, probs = c(0.25, 0.75), ...)))\n    )\n  }\n  out <- sigma / mu\n  if (method == \"unbiased\") {\n    if (is.null(n)) {\n      insight::format_error(\n        \"A value for `n` must be provided when `method = \\\"unbiased\\\"` and both `mu` and `sigma` are provided.\"\n      )\n    }\n    # from DescTools::CoefVar\n    out <- out * (1 - 1 / (4 * (n - 1)) + 1 / n * out^2 + 1 / (2 * (n - 1)^2))\n  }\n  out\n}\n\n\n# descriptives helpers\n\n.drop_na <- function(x) {\n  x[!is.na(x)]\n}\n\n.trim_values <- function(x, trim = 0, n = NULL, weights = NULL) {\n  # TODO: Support weights\n  if (!is.numeric(trim) || length(trim) != 1L) {\n    insight::format_error(\"`trim` must be a single numeric value.\")\n  }\n  if (is.null(n)) {\n    n <- length(x)\n  }\n  if (trim > 0 && n) {\n    if (anyNA(x)) {\n      return(NA_real_)\n    }\n    if (trim >= 0.5) {\n      return(stats::median(x, na.rm = FALSE))\n    }\n    lo <- floor(n * trim) + 1\n    hi <- n + 1 - lo\n    x <- sort.int(x, partial = unique(c(lo, hi)))[lo:hi]\n  }\n  x\n}\n"
  },
  {
    "path": "R/extract_column_names.R",
    "content": "#' @title Find or get columns in a data frame based on search patterns\n#' @name extract_column_names\n#'\n#' @description `extract_column_names()` returns column names from a data set that\n#' match a certain search pattern, while `data_select()` returns the found data.\n#'\n#' @param data A data frame.\n#' @param select Variables that will be included when performing the required\n#'   tasks. Can be either\n#'\n#'   - a variable specified as a literal variable name (e.g., `column_name`),\n#'   - a string with the variable name (e.g., `\"column_name\"`), a character\n#'     vector of variable names (e.g., `c(\"col1\", \"col2\", \"col3\")`), or a\n#'     character vector of variable names including ranges specified via `:`\n#'     (e.g., `c(\"col1:col3\", \"col5\")`),\n#'   - for some functions, like `data_select()` or `data_rename()`, `select` can\n#'     be a named character vector. In this case, the names are used to rename\n#'     the columns in the output data frame. See 'Details' in the related\n#'     functions to see where this option applies.\n#'   - a formula with variable names (e.g., `~column_1 + column_2`),\n#'   - a vector of positive integers, giving the positions counting from the left\n#'     (e.g. `1` or `c(1, 3, 5)`),\n#'   - a vector of negative integers, giving the positions counting from the\n#'     right (e.g., `-1` or `-1:-3`),\n#'   - one of the following select-helpers: `starts_with()`, `ends_with()`,\n#'     `contains()`, a range using `:`, or `regex()`. `starts_with()`,\n#'     `ends_with()`, and  `contains()` accept several patterns, e.g\n#'     `starts_with(\"Sep\", \"Petal\")`. `regex()` can be used to define regular\n#'     expression patterns.\n#'   - a function testing for logical conditions, e.g. `is.numeric()` (or\n#'     `is.numeric`), or any user-defined function that selects the variables\n#'     for which the function returns `TRUE` (like: `foo <- function(x) mean(x) > 3`),\n#'   - ranges specified via literal variable names, select-helpers (except\n#'     `regex()`) and (user-defined) functions can be negated, i.e. return\n#'     non-matching elements, when prefixed with a `-`, e.g. `-ends_with()`,\n#'     `-is.numeric` or `-(Sepal.Width:Petal.Length)`. **Note:** Negation means\n#'     that matches are _excluded_, and thus, the `exclude` argument can be\n#'     used alternatively. For instance, `select=-ends_with(\"Length\")` (with\n#'     `-`) is equivalent to `exclude=ends_with(\"Length\")` (no `-`). In case\n#'     negation should not work as expected, use the `exclude` argument instead.\n#'\n#'   If `NULL`, selects all columns. Patterns that found no matches are silently\n#'   ignored, e.g. `extract_column_names(iris, select = c(\"Species\", \"Test\"))`\n#'   will just return `\"Species\"`.\n#' @param exclude See `select`, however, column names matched by the pattern\n#'   from `exclude` will be excluded instead of selected. If `NULL` (the default),\n#'   excludes no columns.\n#' @param ignore_case Logical, if `TRUE` and when one of the select-helpers or\n#'   a regular expression is used in `select`, ignores lower/upper case in the\n#'   search pattern when matching against variable names.\n#' @param regex Logical, if `TRUE`, the search pattern from `select` will be\n#'   treated as regular expression. When `regex = TRUE`, select *must* be a\n#'   character string (or a variable containing a character string) and is not\n#'   allowed to be one of the supported select-helpers or a character vector\n#'   of length > 1. `regex = TRUE` is comparable to using one of the two\n#'   select-helpers, `select = contains()` or `select = regex()`, however,\n#'   since the select-helpers may not work when called from inside other\n#'   functions (see 'Details'), this argument may be used as workaround.\n#' @param verbose Toggle warnings.\n#' @param ... Arguments passed down to other functions. Mostly not used yet.\n#'\n#' @inherit data_rename seealso\n#'\n#' @return\n#'\n#' `extract_column_names()` returns a character vector with column names that\n#' matched the pattern in `select` and `exclude`, or `NULL` if no matching\n#' column name was found. `data_select()` returns a data frame with matching\n#' columns.\n#'\n#' @details\n#'\n#' Specifically for `data_select()`, `select` can also be a named character\n#' vector. In this case, the names are used to rename the columns in the\n#' output data frame. See 'Examples'.\n#'\n#' Note that it is possible to either pass an entire select helper or only the\n#' pattern inside a select helper as a function argument:\n#'\n#' ```r\n#' foo <- function(data, pattern) {\n#'   extract_column_names(data, select = starts_with(pattern))\n#' }\n#' foo(iris, pattern = \"Sep\")\n#'\n#' foo2 <- function(data, pattern) {\n#'   extract_column_names(data, select = pattern)\n#' }\n#' foo2(iris, pattern = starts_with(\"Sep\"))\n#' ```\n#'\n#' This means that it is also possible to use loop values as arguments or patterns:\n#'\n#' ```r\n#' for (i in c(\"Sepal\", \"Sp\")) {\n#'   head(iris) |>\n#'     extract_column_names(select = starts_with(i)) |>\n#'     print()\n#' }\n#' ```\n#'\n#' However, this behavior is limited to a \"single-level function\". It will not\n#' work in nested functions, like below:\n#'\n#' ```r\n#' inner <- function(data, arg) {\n#'   extract_column_names(data, select = arg)\n#' }\n#' outer <- function(data, arg) {\n#'   inner(data, starts_with(arg))\n#' }\n#' outer(iris, \"Sep\")\n#' ```\n#'\n#' In this case, it is better to pass the whole select helper as the argument of\n#' `outer()`:\n#'\n#' ```r\n#' outer <- function(data, arg) {\n#'   inner(data, arg)\n#' }\n#' outer(iris, starts_with(\"Sep\"))\n#' ```\n#'\n#' @examples\n#' # Find column names by pattern\n#' extract_column_names(iris, starts_with(\"Sepal\"))\n#' extract_column_names(iris, ends_with(\"Width\"))\n#' extract_column_names(iris, regex(\"\\\\.\"))\n#' extract_column_names(iris, c(\"Petal.Width\", \"Sepal.Length\"))\n#'\n#' # starts with \"Sepal\", but not allowed to end with \"width\"\n#' extract_column_names(iris, starts_with(\"Sepal\"), exclude = contains(\"Width\"))\n#'\n#' # find numeric with mean > 3.5\n#' numeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5\n#' extract_column_names(iris, numeric_mean_35)\n#'\n#' # find column names, using range\n#' extract_column_names(mtcars, c(cyl:hp, wt))\n#'\n#' # find range of column names by range, using character vector\n#' extract_column_names(mtcars, c(\"cyl:hp\", \"wt\"))\n#'\n#' # rename returned columns for \"data_select()\"\n#' head(data_select(mtcars, c(`Miles per Gallon` = \"mpg\", Cylinders = \"cyl\")))\n#' @export\nextract_column_names <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  columns <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = FALSE\n  )\n\n  if (!length(columns) || is.null(columns)) {\n    columns <- NULL\n    if (isTRUE(verbose)) {\n      insight::format_warning(\n        \"No column names that matched the required search pattern were found.\"\n      )\n    }\n  }\n\n  columns\n}\n\n#' @rdname extract_column_names\n#' @export\nfind_columns <- extract_column_names\n"
  },
  {
    "path": "R/format.R",
    "content": "# distribution ---------------------------------\n\n#' @export\nformat.parameters_distribution <- function(\n  x,\n  digits = 2,\n  format = NULL,\n  ci_width = \"auto\",\n  ci_brackets = TRUE,\n  ...\n) {\n  # save information\n  att <- attributes(x)\n\n  if (all(c(\"Min\", \"Max\") %in% names(x))) {\n    x$Min <- insight::format_ci(\n      x$Min,\n      x$Max,\n      ci = NULL,\n      digits = digits,\n      width = ci_width,\n      brackets = ci_brackets\n    )\n    x$Max <- NULL\n    colnames(x)[which(colnames(x) == \"Min\")] <- \"Range\"\n  }\n\n  if (all(c(\"Q1\", \"Q3\") %in% names(x))) {\n    x$Q1 <- insight::format_ci(\n      x$Q1,\n      x$Q3,\n      ci = NULL,\n      digits = digits,\n      width = ci_width,\n      brackets = FALSE\n    )\n    x$Q3 <- NULL\n    colnames(x)[which(colnames(x) == \"Q1\")] <- \"Quartiles\"\n  }\n\n  # find CI columns. We might have multiple columns for different centralities\n  ci_columns <- grepl(\"^(CI_low|CI_high)\", colnames(x))\n  # make sure we have matches\n  if (any(ci_columns)) {\n    # iterate all centrality options\n    centrality <- .centrality_options(att$centrality)\n    for (ce in centrality) {\n      # this is the original column name\n      ci_columns <- c(paste0(\"CI_low_\", ce), paste0(\"CI_high_\", ce))\n      # we format CI column, merge it into one column\n      x[[ci_columns[1]]] <- insight::format_ci(\n        x[[ci_columns[1]]],\n        x[[ci_columns[2]]],\n        ci = NULL,\n        digits = digits,\n        width = ci_width,\n        brackets = ci_brackets\n      )\n      # ... and remove the no longer needed CI_high column\n      x[[ci_columns[2]]] <- NULL\n      ci_lvl <- attributes(x)$ci\n\n      # find position of CI column\n      ci_columm_pos <- which(colnames(x) == ci_columns[1])\n\n      # rename\n      if (is.null(ci_lvl)) {\n        colnames(x)[ci_columm_pos] <- sprintf(\n          \"CI (%s)\",\n          insight::format_capitalize(ce)\n        )\n      } else {\n        colnames(x)[ci_columm_pos] <- sprintf(\n          \"%i%% CI (%s)\",\n          round(100 * ci_lvl),\n          insight::format_capitalize(ce)\n        )\n      }\n\n      # make sure we have the correct column name of the centrality\n      centr_name <- switch(\n        tolower(ce),\n        mean = \"Mean\",\n        median = \"Median\",\n        map = \"MAP\"\n      )\n\n      # reorder CI column, move it to related centrality index\n      centr_pos <- which(colnames(x) == centr_name)\n      if (length(centr_pos)) {\n        x <- data_relocate(x, select = ci_columm_pos, after = centr_pos)\n      }\n    }\n  }\n\n  if (\"Trimmed_Mean\" %in% colnames(x)) {\n    threshold <- attributes(x)$threshold\n    if (is.null(threshold)) {\n      trim_name <- \"Trimmed\"\n    } else {\n      trim_name <- sprintf(\"Trimmed (%g%%)\", round(100 * threshold))\n    }\n    colnames(x)[which(colnames(x) == \"Trimmed_Mean\")] <- trim_name\n  }\n\n  x\n}\n"
  },
  {
    "path": "R/labels_to_levels.R",
    "content": "#' @title Convert value labels into factor levels\n#' @name labels_to_levels\n#'\n#' @details\n#' `labels_to_levels()` allows to use value labels of factors as their levels.\n#'\n#' @param x A data frame or factor. Other variable types (e.g. numerics) are not\n#' allowed.\n#' @param ... Currently not used.\n#' @inheritParams extract_column_names\n#' @inheritParams categorize\n#'\n#' @return `x`, where for all factors former levels are replaced by their value\n#' labels.\n#'\n#' @examples\n#' data(efc)\n#' # create factor\n#' x <- as.factor(efc$c172code)\n#' # add value labels - these are not factor levels yet\n#' x <- assign_labels(x, values = c(`1` = \"low\", `2` = \"mid\", `3` = \"high\"))\n#' levels(x)\n#' data_tabulate(x)\n#'\n#' x <- labels_to_levels(x)\n#' levels(x)\n#' data_tabulate(x)\n#' @export\nlabels_to_levels <- function(x, ...) {\n  UseMethod(\"labels_to_levels\")\n}\n\n\n#' @export\nlabels_to_levels.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\"`labels_to_levels()` only works for factors.\")\n  }\n  x\n}\n\n#' @rdname labels_to_levels\n#' @export\nlabels_to_levels.factor <- function(x, verbose = TRUE, ...) {\n  if (is.null(attr(x, \"labels\", exact = TRUE))) {\n    insight::format_error(\n      \"Could not change factor levels. Variable had no value labels.\"\n    )\n  }\n  .value_labels_to_levels(x, verbose = verbose)\n}\n\n#' @rdname labels_to_levels\n#' @export\nlabels_to_levels.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  append = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # validation check, return as is for complete factor\n  if (all(vapply(x, is.factor, TRUE))) {\n    return(x)\n  }\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # keep only factors\n  select <- colnames(x[select])[vapply(x[select], is.factor, TRUE)]\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    arguments <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_l\",\n      preserve_value_labels = TRUE,\n      keep_factors = TRUE,\n      keep_character = FALSE\n    )\n    # update processed arguments\n    x <- arguments$x\n    select <- arguments$select\n  }\n\n  x[select] <- lapply(\n    x[select],\n    labels_to_levels,\n    verbose = verbose,\n    ...\n  )\n  x\n}\n"
  },
  {
    "path": "R/makepredictcall.R",
    "content": "#' Utility Function for Safe Prediction with `datawizard` transformers\n#'\n#' This function allows for the use of (some of) `datawizard`'s transformers\n#' inside a model formula. See examples below.\n#' \\cr\\cr\n#' Currently, [center()], [standardize()], [normalize()], & [rescale()] are\n#' supported.\n#'\n#' @inheritParams stats::makepredictcall\n#'\n#' @inherit stats::makepredictcall return\n#' @importFrom stats makepredictcall\n#'\n#' @seealso [stats::makepredictcall()]\n#' @family datawizard-transformers\n#'\n#' @examples\n#'\n#' data(\"mtcars\")\n#' train <- mtcars[1:30, ]\n#' test <- mtcars[31:32, ]\n#'\n#' m1 <- lm(mpg ~ center(hp), data = train)\n#' predict(m1, newdata = test) # Data is \"centered\" before the prediction is made,\n#' # according to the center of the old data\n#'\n#' m2 <- lm(mpg ~ standardize(hp), data = train)\n#' m3 <- lm(mpg ~ scale(hp), data = train) # same as above\n#' predict(m2, newdata = test) # Data is \"standardized\" before the prediction is made.\n#' predict(m3, newdata = test) # Data is \"standardized\" before the prediction is made.\n#'\n#'\n#' m4 <- lm(mpg ~ normalize(hp), data = mtcars)\n#' m5 <- lm(mpg ~ rescale(hp, to = c(-3, 3)), data = mtcars)\n#'\n#' (newdata <- data.frame(hp = c(range(mtcars$hp), 400))) # 400 is outside original range!\n#'\n#' model.frame(delete.response(terms(m4)), data = newdata)\n#' model.frame(delete.response(terms(m5)), data = newdata)\n#'\n#' @export\nmakepredictcall.dw_transformer <- function(var, call) {\n  if (is.matrix(var) || is.array(var)) {\n    insight::format_error(\n      \"datawizard scalers in model formulas are not supported for matrices.\"\n    )\n  }\n\n  switch(\n    as.character(call)[1L],\n    centre = ,\n    center = {\n      call$center <- attr(var, \"center\")\n    },\n    standardise = ,\n    standardize = {\n      call$center <- attr(var, \"center\")\n      call$scale <- attr(var, \"scale\")\n    },\n    normalize = ,\n    normalise = {\n      call$min_value <- attr(var, \"min_value\")\n      call$range_difference <- attr(var, \"range_difference\")\n      call$vector_length <- attr(var, \"vector_length\")\n      call$include_bounds <- attr(var, \"include_bounds\")\n      call$flag_bounds <- attr(var, \"flag_bounds\")\n    },\n    rescale = {\n      call$min_value <- attr(var, \"min_value\")\n      call$max_value <- attr(var, \"max_value\")\n      call$new_min <- attr(var, \"new_min\")\n      call$new_max <- attr(var, \"new_max\")\n    },\n\n    # ELSE:\n    {\n      return(call)\n    }\n  )\n\n  call$verbose <- FALSE\n  call\n}\n"
  },
  {
    "path": "R/mean_sd.R",
    "content": "#' Summary Helpers\n#'\n#' @param x A numeric vector (or one that can be coerced to one via\n#'   `as.numeric()`) to be summarized.\n#' @param named Should the vector be named?\n#'   (E.g., `c(\"-SD\" = -1, Mean = 1, \"+SD\" = 2)`.)\n#' @param times How many SDs above and below the Mean (or MADs around the Median)\n#' @param ... Not used.\n#' @inheritParams coef_var\n#' @inheritParams stats::mad\n#'\n#' @return A (possibly named) numeric vector of length `2*times + 1` of SDs\n#'   below the mean, the mean, and SDs above the mean (or median and MAD).\n#'\n#' @examples\n#' mean_sd(mtcars$mpg)\n#'\n#' mean_sd(mtcars$mpg, times = 2L)\n#'\n#' median_mad(mtcars$mpg)\n#'\n#' @export\nmean_sd <- function(x, times = 1L, remove_na = TRUE, named = TRUE, ...) {\n  .centrality_dispersion(\n    x,\n    type = \"mean\",\n    times = times,\n    remove_na = remove_na,\n    named = named\n  )\n}\n\n#' @export\n#' @rdname mean_sd\nmedian_mad <- function(\n  x,\n  times = 1L,\n  remove_na = TRUE,\n  constant = 1.4826,\n  named = TRUE,\n  ...\n) {\n  .centrality_dispersion(\n    x,\n    type = \"median\",\n    times = times,\n    remove_na = remove_na,\n    constant = constant,\n    named = named\n  )\n}\n\n#' @keywords Internal\n.centrality_dispersion <- function(\n  x,\n  type = \"mean\",\n  remove_na = TRUE,\n  times = 1L,\n  constant = 1.4826,\n  named = TRUE,\n  ...\n) {\n  x <- as.numeric(x)\n  times <- as.integer(times)\n  type <- match.arg(type, choices = c(\"mean\", \"median\"))\n\n  # centrality\n  M <- switch(\n    type,\n    median = stats::median(x, na.rm = remove_na),\n    mean(x, na.rm = remove_na)\n  )\n\n  S <- switch(\n    type,\n    median = stats::mad(x, na.rm = remove_na, constant = constant),\n    stats::sd(x, na.rm = remove_na)\n  )\n\n  v <- M + c(-rev(seq_len(times)), 0, seq_len(times)) * S\n\n  if (isTRUE(named)) {\n    string_cs <- switch(type, median = c(\"Median\", \"MAD\"), c(\"Mean\", \"SD\"))\n    if (times == 1L) {\n      times <- \"\"\n    } else {\n      times <- paste0(seq_len(times), \" \")\n    }\n    names(v) <- c(\n      paste0(\"-\", rev(times), string_cs[2]),\n      string_cs[1],\n      paste0(\"+\", times, string_cs[2])\n    )\n  }\n  v\n}\n"
  },
  {
    "path": "R/means_by_group.R",
    "content": "#' @title Summary of mean values by group\n#' @name means_by_group\n#'\n#' @description Computes summary table of means by groups.\n#'\n#' @param x A vector or a data frame.\n#' @param by If `x` is a numeric vector, `by` should be a factor that\n#' indicates the group-classifying categories. If `x` is a data frame, `by`\n#' should be a character string, naming the variable in `x` that is used for\n#' grouping. Numeric vectors are coerced to factors. Not that `by` should\n#' only refer to a single variable.\n#' @param ci Level of confidence interval for mean estimates. Default is `0.95`.\n#' Use `ci = NA` to suppress confidence intervals.\n#' @param weights If `x` is a numeric vector, `weights` should be a vector of\n#' weights that will be applied to weight all observations. If `x` is a data\n#' frame, `weights` can also be a character string indicating the name of the\n#' variable in `x` that should be used for weighting. Default is `NULL`, so no\n#' weights are used.\n#' @param digits Optional scalar, indicating the amount of digits after decimal\n#' point when rounding estimates and values.\n#' @param ... Currently not used\n#' @inheritParams find_columns\n#'\n#' @return A data frame with information on mean and further summary statistics\n#' for each sub-group.\n#'\n#' @details This function is comparable to `aggregate(x, by, mean)`, but provides\n#' some further information, including summary statistics from a One-Way-ANOVA\n#' using `x` as dependent and `by` as independent variable. [`emmeans::contrast()`]\n#' is used to get p-values for each sub-group. P-values indicate whether each\n#' group-mean is significantly different from the total mean.\n#'\n#' @examples\n#' data(efc)\n#' means_by_group(efc, \"c12hour\", \"e42dep\")\n#'\n#' data(iris)\n#' means_by_group(iris, \"Sepal.Width\", \"Species\")\n#'\n#' # weighting\n#' efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n#' means_by_group(efc, \"c12hour\", \"e42dep\", weights = \"weight\")\n#' @export\nmeans_by_group <- function(x, ...) {\n  UseMethod(\"means_by_group\")\n}\n\n\n#' @export\nmeans_by_group.default <- function(x, ...) {\n  insight::format_error(\n    \"`means_by_group()` does not work for objects of class `\",\n    class(x)[1],\n    \"`.\"\n  )\n}\n\n\n#' @rdname means_by_group\n#' @export\nmeans_by_group.numeric <- function(\n  x,\n  by = NULL,\n  ci = 0.95,\n  weights = NULL,\n  digits = NULL,\n  ...\n) {\n  # validation check for arguments\n\n  # \"by\" must be provided\n  if (is.null(by)) {\n    insight::format_error(\"Argument `by` is missing.\")\n  }\n\n  # by must be of same length as x\n  if (length(by) != length(x)) {\n    insight::format_error(\"Argument `by` must be of same length as `x`.\")\n  }\n\n  # if weights are provided, must be of same length as x\n  if (!is.null(weights) && length(weights) != length(x)) {\n    insight::format_error(\"Argument `weights` must be of same length as `x`.\")\n  }\n\n  # if weights are NULL, set weights to 1\n  if (is.null(weights)) {\n    weights <- rep(1, length(x))\n  }\n\n  # retrieve labels\n  var_mean_label <- attr(x, \"label\", exact = TRUE)\n  var_grp_label <- attr(by, \"label\", exact = TRUE)\n\n  # if no labels present, use variable names directly\n  if (is.null(var_mean_label)) {\n    var_mean_label <- deparse(substitute(x))\n  }\n  if (is.null(var_grp_label)) {\n    var_grp_label <- deparse(substitute(by))\n  }\n\n  # coerce group to factor if numeric, or convert labels to levels, if factor\n  if (is.factor(by)) {\n    by <- tryCatch(labels_to_levels(by, verbose = FALSE), error = function(e) {\n      by\n    })\n  } else {\n    by <- to_factor(by)\n  }\n\n  my_data <- stats::na.omit(data.frame(\n    x = x,\n    group = by,\n    weights = weights,\n    stringsAsFactors = FALSE\n  ))\n\n  # get grouped means table\n  out <- .means_by_group(my_data, ci = ci)\n\n  # attributes\n  attr(out, \"var_mean_label\") <- var_mean_label\n  attr(out, \"var_grp_label\") <- var_grp_label\n  attr(out, \"digits\") <- digits\n\n  class(out) <- c(\"dw_groupmeans\", \"data.frame\")\n  out\n}\n\n\n#' @rdname means_by_group\n#' @export\nmeans_by_group.data.frame <- function(\n  x,\n  select = NULL,\n  by = NULL,\n  ci = 0.95,\n  weights = NULL,\n  digits = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  if (is.null(weights)) {\n    w <- NULL\n  } else if (is.character(weights)) {\n    w <- x[[weights]]\n  } else {\n    w <- weights\n  }\n\n  out <- lapply(select, function(i) {\n    # if no labels present, use variable names directy\n    if (is.null(attr(x[[i]], \"label\", exact = TRUE))) {\n      attr(x[[i]], \"label\") <- i\n    }\n    if (is.null(attr(x[[by]], \"label\", exact = TRUE))) {\n      attr(x[[by]], \"label\") <- by\n    }\n    # compute means table\n    means_by_group(\n      x[[i]],\n      by = x[[by]],\n      ci = ci,\n      weights = w,\n      digits = digits,\n      ...\n    )\n  })\n\n  class(out) <- c(\"dw_groupmeans_list\", \"list\")\n  out\n}\n\n\n#' @keywords internal\n.means_by_group <- function(data, ci = 0.95) {\n  # compute anova statistics for mean table\n  if (is.null(data$weights) || all(data$weights == 1)) {\n    fit <- stats::lm(x ~ group, data = data)\n  } else {\n    fit <- stats::lm(x ~ group, weights = data$weights, data = data)\n  }\n\n  # summary table data\n  groups <- split(data$x, data$group)\n  group_weights <- split(data$weights, data$group)\n  out <- do.call(\n    rbind,\n    Map(\n      function(x, w) {\n        data.frame(\n          Mean = weighted_mean(x, weights = w),\n          SD = weighted_sd(x, weights = w),\n          N = round(sum(w)),\n          stringsAsFactors = FALSE\n        )\n      },\n      groups,\n      group_weights\n    )\n  )\n\n  # add group names\n  out$Category <- levels(data$group)\n  out$p <- out$CI_high <- out$CI_low <- NA\n\n  # p-values of contrast-means\n  if (insight::check_if_installed(\"emmeans\", quietly = TRUE)) {\n    # create summary table of contrasts, for p-values and confidence intervals\n    predicted <- emmeans::emmeans(fit, specs = \"group\", level = ci)\n    emm_contrasts <- emmeans::contrast(predicted, method = \"eff\")\n    # add p-values and confidence intervals to \"out\"\n    if (!is.null(ci) && !is.na(ci)) {\n      summary_table <- as.data.frame(predicted)\n      out$CI_low <- summary_table$lower.CL\n      out$CI_high <- summary_table$upper.CL\n    }\n    summary_table <- as.data.frame(emm_contrasts)\n    out$p <- summary_table$p.value\n  }\n\n  # reorder columns\n  out <- out[c(\"Category\", \"Mean\", \"N\", \"SD\", \"CI_low\", \"CI_high\", \"p\")]\n\n  # finally, add total-row\n  out <- rbind(\n    out,\n    data.frame(\n      Category = \"Total\",\n      Mean = weighted_mean(data$x, weights = data$weights),\n      N = nrow(data),\n      SD = weighted_sd(data$x, weights = data$weights),\n      CI_low = NA,\n      CI_high = NA,\n      p = NA,\n      stringsAsFactors = FALSE\n    )\n  )\n\n  # get anova statistics for mean table\n  sum.fit <- summary(fit)\n\n  # r-squared values\n  r2 <- sum.fit$r.squared\n  r2.adj <- sum.fit$adj.r.squared\n\n  # F-statistics\n  fstat <- sum.fit$fstatistic\n  pval <- stats::pf(fstat[1], fstat[2], fstat[3], lower.tail = FALSE)\n\n  # copy as attributes\n  attr(out, \"r2\") <- r2\n  attr(out, \"ci\") <- ci\n  attr(out, \"adj.r2\") <- r2.adj\n  attr(out, \"fstat\") <- fstat[1]\n  attr(out, \"p.value\") <- pval\n\n  out\n}\n\n\n# methods -----------------\n\n#' @export\nformat.dw_groupmeans <- function(x, digits = NULL, ...) {\n  if (is.null(digits)) {\n    digits <- attr(x, \"digits\", exact = TRUE)\n  }\n  if (is.null(digits)) {\n    digits <- 2\n  }\n  x$N <- insight::format_value(x$N, digits = 0)\n  insight::format_table(remove_empty_columns(x), digits = digits, ...)\n}\n\n#' @export\nprint.dw_groupmeans <- function(x, digits = NULL, ...) {\n  out <- format(x, digits = digits, ...)\n\n  # caption\n  l1 <- attributes(x)$var_mean_label\n  l2 <- attributes(x)$var_grp_label\n  if (!is.null(l1) && !is.null(l2)) {\n    caption <- c(paste0(\"# Mean of \", l1, \" by \", l2), \"blue\")\n  } else {\n    caption <- NULL\n  }\n\n  # footer\n  footer <- paste0(\n    \"\\nAnova: R2=\",\n    insight::format_value(attributes(x)$r2, digits = 3),\n    \"; adj.R2=\",\n    insight::format_value(attributes(x)$adj.r2, digits = 3),\n    \"; F=\",\n    insight::format_value(attributes(x)$fstat, digits = 3),\n    \"; \",\n    insight::format_p(attributes(x)$p.value, whitespace = FALSE),\n    \"\\n\"\n  )\n\n  cat(insight::export_table(out, caption = caption, footer = footer, ...))\n}\n\n#' @export\nprint.dw_groupmeans_list <- function(x, digits = NULL, ...) {\n  for (i in seq_along(x)) {\n    if (i > 1) {\n      cat(\"\\n\")\n    }\n    print(x[[i]], digits = digits, ...)\n  }\n}\n"
  },
  {
    "path": "R/normalize.R",
    "content": "#' Normalize numeric variable to 0-1 range\n#'\n#' Performs a normalization of data, i.e., it scales variables in the range\n#' 0 - 1. This is a special case of [rescale()]. `unnormalize()` is the\n#' counterpart, but only works for variables that have been normalized with\n#' `normalize()`.\n#'\n#' @param x A numeric vector, (grouped) data frame, or matrix. See 'Details'.\n#' @param include_bounds Numeric or logical. Using this can be useful in case of\n#'   beta-regression, where the response variable is not allowed to include\n#'   zeros and ones. If `TRUE`, the input is normalized to a range that includes\n#'   zero and one. If `FALSE`, the return value is compressed, using\n#'   Smithson and Verkuilen's (2006) formula `(x * (n - 1) + 0.5) / n`, to avoid\n#'   zeros and ones in the normalized variables. Else, if numeric (e.g., `0.001`),\n#'   `include_bounds` defines the \"distance\" to the lower and upper bound, i.e.\n#'   the normalized vectors are rescaled to a range from `0 + include_bounds` to\n#'   `1 - include_bounds`.\n#' @param ... Arguments passed to or from other methods.\n#' @inheritParams standardize.data.frame\n#' @inheritParams extract_column_names\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @details\n#'\n#' - If `x` is a matrix, normalization is performed across all values (not\n#'   column- or row-wise). For column-wise normalization, convert the matrix to a\n#'   data.frame.\n#' - If `x` is a grouped data frame (`grouped_df`), normalization is performed\n#'   separately for each group.\n#'\n#' @seealso See [makepredictcall.dw_transformer()] for use in model formulas.\n#'\n#' @examples\n#'\n#' normalize(c(0, 1, 5, -5, -2))\n#' normalize(c(0, 1, 5, -5, -2), include_bounds = FALSE)\n#' # use a value defining the bounds\n#' normalize(c(0, 1, 5, -5, -2), include_bounds = 0.001)\n#'\n#' head(normalize(trees))\n#'\n#' @references\n#'\n#' Smithson M, Verkuilen J (2006). A Better Lemon Squeezer? Maximum-Likelihood\n#' Regression with Beta-Distributed Dependent Variables. Psychological Methods,\n#' 11(1), 54–71.\n#'\n#' @family transform utilities\n#'\n#' @return A normalized object.\n#'\n#' @export\nnormalize <- function(x, ...) {\n  UseMethod(\"normalize\")\n}\n\n\n#' @rdname normalize\n#' @export\nnormalize.numeric <- function(x, include_bounds = TRUE, verbose = TRUE, ...) {\n  # Warning if all NaNs or infinite\n  if (all(is.infinite(x) | is.na(x))) {\n    return(x)\n  }\n\n  # safe name, for later use\n  if (is.null(names(x))) {\n    name <- insight::safe_deparse(substitute(x))\n  } else {\n    name <- names(x)\n  }\n\n  # Get infinite and replace by NA (so that the normalization doesn't fail)\n  infinite_idx <- is.infinite(x)\n  infinite_vals <- x[infinite_idx]\n  x[infinite_idx] <- NA\n\n  # called from \"makepredictcal()\"? Then we have additional arguments\n  dot_args <- list(...)\n  flag_predict <- FALSE\n  required_dot_args <- c(\n    \"range_difference\",\n    \"min_value\",\n    \"vector_length\",\n    \"flag_bounds\"\n  )\n\n  if (all(required_dot_args %in% names(dot_args))) {\n    # we gather informatiom about the original data, which is needed\n    # for \"predict()\" to work properly when \"normalize()\" is called\n    # in formulas on-the-fly, e.g. \"lm(mpg ~ normalize(hp), data = mtcars)\"\n    range_difference <- dot_args$range_difference\n    min_value <- dot_args$min_value\n    vector_length <- dot_args$vector_length\n    flag_bounds <- dot_args$flag_bounds\n    flag_predict <- TRUE\n  } else {\n    range_difference <- diff(range(x, na.rm = TRUE))\n    min_value <- min(x, na.rm = TRUE)\n    vector_length <- length(x)\n    flag_bounds <- NULL\n  }\n\n  # Warning if only one value\n  if (!flag_predict && insight::has_single_value(x)) {\n    if (verbose) {\n      insight::format_warning(\n        paste0(\n          \"Variable `\",\n          name,\n          \"` contains only one unique value and will not be normalized.\"\n        )\n      )\n    }\n    return(x)\n  }\n\n  # Warning if logical vector\n  if (insight::n_unique(x) == 2 && verbose) {\n    insight::format_warning(\n      paste0(\n        \"Variable `\",\n        name,\n        \"` contains only two unique values. Consider converting it to a factor.\"\n      )\n    )\n  }\n\n  # rescale\n  out <- as.vector((x - min_value) / range_difference)\n\n  # if we don't have information on whether bounds are included or not,\n  # get this information here.\n  if (is.null(flag_bounds)) {\n    flag_bounds <- (any(out == 0) || any(out == 1))\n  }\n\n  if (!isTRUE(include_bounds) && flag_bounds) {\n    if (isFALSE(include_bounds)) {\n      out <- (out * (vector_length - 1) + 0.5) / vector_length\n    } else if (\n      is.numeric(include_bounds) && include_bounds > 0 && include_bounds < 1\n    ) {\n      out <- rescale(out, to = c(0 + include_bounds, 1 - include_bounds))\n    } else if (verbose) {\n      insight::format_warning(\n        \"`include_bounds` must be either logical or numeric (between 0 and 1).\",\n        \"Bounds (zeros and ones) are included in the returned value.\"\n      )\n    }\n  }\n\n  # Re-insert infinite values\n  out[infinite_idx] <- infinite_vals\n\n  attr(out, \"include_bounds\") <- include_bounds\n  attr(out, \"flag_bounds\") <- isTRUE(flag_bounds)\n  attr(out, \"min_value\") <- min_value\n  attr(out, \"vector_length\") <- vector_length\n  attr(out, \"range_difference\") <- range_difference\n  # don't add attribute when we call data frame methods\n  if (!isFALSE(dot_args$add_transform_class)) {\n    class(out) <- c(\"dw_transformer\", class(out))\n  }\n\n  out\n}\n\n\n#' @export\nnormalize.factor <- function(x, ...) {\n  x\n}\n\n\n#' @export\nnormalize.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  include_bounds = TRUE,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    remove_group_var = TRUE,\n    verbose = verbose\n  )\n\n  info <- attributes(x)\n\n  grps <- attr(x, \"groups\", exact = TRUE)[[\".rows\"]]\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_n\"\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  x <- as.data.frame(x)\n\n  # create column(s) to store dw_transformer attributes\n  for (i in select) {\n    info$groups[[paste0(\"attr_\", i)]] <- rep(NA, length(grps))\n  }\n\n  for (rows in seq_along(grps)) {\n    tmp <- normalize(\n      x[grps[[rows]], , drop = FALSE],\n      select = select,\n      exclude = exclude,\n      include_bounds = include_bounds,\n      verbose = verbose,\n      append = FALSE, # need to set to FALSE here, else variable will be doubled\n      add_transform_class = FALSE,\n      ...\n    )\n\n    # store dw_transformer_attributes\n    for (i in select) {\n      info$groups[rows, paste0(\"attr_\", i)][[1]] <- list(unlist(attributes(tmp[[\n        i\n      ]])))\n    }\n\n    x[grps[[rows]], ] <- tmp\n  }\n\n  # last column of \"groups\" attributes must be called \".rows\"\n  info$groups <- data_relocate(info$groups, \".rows\", after = -1)\n\n  # set back class, so data frame still works with dplyr\n  attributes(x) <- utils::modifyList(info, attributes(x))\n  class(x) <- c(\"grouped_df\", class(x))\n  x\n}\n\n\n#' @rdname normalize\n#' @export\nnormalize.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  include_bounds = TRUE,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_n\"\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  x[select] <- lapply(\n    x[select],\n    normalize,\n    include_bounds = include_bounds,\n    verbose = verbose,\n    add_transform_class = FALSE\n  )\n\n  x\n}\n\n\n#' @export\nnormalize.matrix <- function(x, ...) {\n  matrix(normalize(as.numeric(x), ...), nrow = nrow(x))\n}\n"
  },
  {
    "path": "R/ranktransform.R",
    "content": "#' (Signed) rank transformation\n#'\n#' Transform numeric values with the integers of their rank (i.e., 1st smallest,\n#' 2nd smallest, 3rd smallest, etc.). Setting the `sign` argument to `TRUE` will\n#' give you signed ranks, where the ranking is done according to absolute size\n#' but where the sign is preserved (i.e., 2, 1, -3, 4).\n#'\n#' @param x Object.\n#' @param sign Logical, if `TRUE`, return signed ranks.\n#' @param method Treatment of ties. Can be one of `\"average\"` (default),\n#'   `\"first\"`, `\"last\"`, `\"random\"`, `\"max\"` or `\"min\"`. See [rank()] for\n#'   details.\n#' @param zeros How to handle zeros. If `\"na\"` (default), they are marked as\n#' `NA`. If `\"signrank\"`, they are kept during the ranking and marked as zeros.\n#' This is only used when `sign = TRUE`.\n#' @param ... Arguments passed to or from other methods.\n#' @inheritParams extract_column_names\n#' @inheritParams standardize.data.frame\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @examples\n#' ranktransform(c(0, 1, 5, -5, -2))\n#'\n#' # By default, zeros are converted to NA\n#' suppressWarnings(\n#'   ranktransform(c(0, 1, 5, -5, -2), sign = TRUE)\n#' )\n#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE, zeros = \"signrank\")\n#'\n#' head(ranktransform(trees))\n#' @return A rank-transformed object.\n#'\n#' @family transform utilities\n#'\n#' @export\nranktransform <- function(x, ...) {\n  UseMethod(\"ranktransform\")\n}\n\n\n#' @rdname ranktransform\n#' @export\nranktransform.numeric <- function(\n  x,\n  sign = FALSE,\n  method = \"average\",\n  zeros = \"na\",\n  verbose = TRUE,\n  ...\n) {\n  # no change if all values are `NA`s\n  if (all(is.na(x))) {\n    return(x)\n  }\n\n  zeros <- match.arg(zeros, c(\"na\", \"signrank\"))\n  method <- match.arg(\n    method,\n    c(\"average\", \"first\", \"last\", \"random\", \"max\", \"min\")\n  )\n\n  # Warning if only one value and return early\n  if (insight::has_single_value(x)) {\n    if (is.null(names(x))) {\n      name <- deparse(substitute(x))\n    } else {\n      name <- names(x)\n    }\n\n    if (verbose) {\n      insight::format_warning(\n        paste0(\n          \"Variable `\",\n          name,\n          \"` contains only one unique value and will not be normalized.\"\n        )\n      )\n    }\n\n    return(x)\n  }\n\n  # Warning if only two values present but don't return early\n  if (length(unique(x)) == 2L) {\n    if (is.null(names(x))) {\n      name <- deparse(substitute(x))\n    } else {\n      name <- names(x)\n    }\n\n    if (verbose) {\n      # nolint\n      insight::format_warning(\n        paste0(\n          \"Variable `\",\n          name,\n          \"` contains only two different values. Consider converting it to a factor.\"\n        )\n      )\n    }\n  }\n\n  if (sign) {\n    if (zeros == \"na\") {\n      out <- rep(NA, length(x))\n      ZEROES <- x == 0\n      if (any(ZEROES) && verbose) {\n        insight::format_warning(\n          \"Zeros detected. These cannot be sign-rank transformed.\"\n        ) # nolint\n      }\n      out[!ZEROES] <- sign(x[!ZEROES]) *\n        rank(abs(x[!ZEROES]), ties.method = method, na.last = \"keep\")\n    } else if (zeros == \"signrank\") {\n      out <- sign(x) * rank(abs(x), ties.method = method, na.last = \"keep\")\n    }\n  } else {\n    out <- rank(x, ties.method = method, na.last = \"keep\")\n  }\n\n  out\n}\n\n\n#' @export\nranktransform.factor <- function(x, ...) {\n  x\n}\n\n\n#' @export\nranktransform.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  sign = FALSE,\n  method = \"average\",\n  ignore_case = FALSE,\n  regex = FALSE,\n  zeros = \"na\",\n  verbose = TRUE,\n  ...\n) {\n  info <- attributes(x)\n\n  grps <- attr(x, \"groups\", exact = TRUE)[[\".rows\"]]\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  x <- as.data.frame(x)\n  for (rows in grps) {\n    x[rows, ] <- ranktransform(\n      x[rows, , drop = FALSE],\n      select = select,\n      exclude = exclude,\n      sign = sign,\n      method = method,\n      ...\n    )\n  }\n  # set back class, so data frame still works with dplyr\n  attributes(x) <- info\n  x\n}\n\n\n#' @rdname ranktransform\n#' @export\nranktransform.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  sign = FALSE,\n  method = \"average\",\n  ignore_case = FALSE,\n  regex = FALSE,\n  zeros = \"na\",\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  x[select] <- lapply(x[select], ranktransform, sign = sign, method = method)\n  x\n}\n"
  },
  {
    "path": "R/recode_into.R",
    "content": "#' @title Recode values from one or more variables into a new variable\n#' @name recode_into\n#'\n#' @description\n#' This functions recodes values from one or more variables into a new variable.\n#' It is a convenient function to avoid nested [`ifelse()`] statements, which\n#' is similar to `dplyr::case_when()`.\n#'\n#' @param ... A sequence of two-sided formulas, where the left hand side (LHS)\n#' is a logical matching condition that determines which values match this case.\n#' The LHS of this formula is also called \"recode pattern\" (e.g., in messages).\n#' The right hand side (RHS) indicates the replacement value.\n#' @param data Optional, name of a data frame. This can be used to avoid writing\n#' the data name multiple times in `...`. See 'Examples'.\n#' @param default Indicates the default value that is chosen when no match in\n#' the formulas in `...` is found. If not provided, `NA` is used as default\n#' value.\n#' @param overwrite Logical, if `TRUE` (default) and more than one recode pattern\n#' apply to the same case, already recoded values will be overwritten by subsequent\n#' recode patterns. If `FALSE`, former recoded cases will not be altered by later\n#' recode patterns that would apply to those cases again. A warning message is\n#' printed to alert such situations and to avoid unintentional recodings.\n#' @param preserve_na Logical, if `TRUE` and `default` is not `NA`, missing\n#' values in the original variable will be set back to `NA` in the recoded\n#' variable (unless overwritten by other recode patterns). If `FALSE`, missing\n#' values in the original variable will be recoded to `default`. Setting\n#' `preserve_na = TRUE` prevents unintentional overwriting of missing values\n#' with `default`, which means that you won't find valid values where the\n#' original data only had missing values. See 'Examples'.\n#' @param verbose Toggle warnings.\n#'\n#' @return A vector with recoded values.\n#'\n#' @examples\n#' x <- 1:30\n#' recode_into(\n#'   x > 15 ~ \"a\",\n#'   x > 10 & x <= 15 ~ \"b\",\n#'   default = \"c\"\n#' )\n#'\n#' x <- 1:10\n#' # default behaviour: second recode pattern \"x > 5\" overwrites\n#' # some of the formerly recoded cases from pattern \"x >= 3 & x <= 7\"\n#' recode_into(\n#'   x >= 3 & x <= 7 ~ 1,\n#'   x > 5 ~ 2,\n#'   default = 0,\n#'   verbose = FALSE\n#' )\n#'\n#' # setting \"overwrite = FALSE\" will not alter formerly recoded cases\n#' recode_into(\n#'   x >= 3 & x <= 7 ~ 1,\n#'   x > 5 ~ 2,\n#'   default = 0,\n#'   overwrite = FALSE,\n#'   verbose = FALSE\n#' )\n#'\n#' set.seed(123)\n#' d <- data.frame(\n#'   x = sample(1:5, 30, TRUE),\n#'   y = sample(letters[1:5], 30, TRUE),\n#'   stringsAsFactors = FALSE\n#' )\n#'\n#' # from different variables into new vector\n#' recode_into(\n#'   d$x %in% 1:3 & d$y %in% c(\"a\", \"b\") ~ 1,\n#'   d$x > 3 ~ 2,\n#'   default = 0\n#' )\n#'\n#' # no need to write name of data frame each time\n#' recode_into(\n#'   x %in% 1:3 & y %in% c(\"a\", \"b\") ~ 1,\n#'   x > 3 ~ 2,\n#'   data = d,\n#'   default = 0\n#' )\n#'\n#' # handling of missing values\n#' d <- data.frame(\n#'   x = c(1, NA, 2, NA, 3, 4),\n#'   y = c(1, 11, 3, NA, 5, 6)\n#' )\n#' # first NA in x is overwritten by valid value from y\n#' # we have no known value for second NA in x and y,\n#' # thus we get one NA in the result\n#' recode_into(\n#'   x <= 3 ~ 1,\n#'   y > 5 ~ 2,\n#'   data = d,\n#'   default = 0,\n#'   preserve_na = TRUE\n#' )\n#' # first NA in x is overwritten by valid value from y\n#' # default value is used for second NA\n#' recode_into(\n#'   x <= 3 ~ 1,\n#'   y > 5 ~ 2,\n#'   data = d,\n#'   default = 0,\n#'   preserve_na = FALSE\n#' )\n#' @export\nrecode_into <- function(\n  ...,\n  data = NULL,\n  default = NA,\n  overwrite = TRUE,\n  preserve_na = FALSE,\n  verbose = TRUE\n) {\n  dots <- list(...)\n\n  # get length of vector, so we know the length of the output vector\n  len <- if (is.null(data)) {\n    length(.dynEval(dots[[1]][[2]], ifnotfound = NULL))\n  } else {\n    length(with(data, eval(dots[[1]][[2]])))\n  }\n\n  # how many expressions (recode-formulas) do we have?\n  n_params <- length(dots)\n\n  # last expression should always be the default value\n  if (is.null(default)) {\n    default <- NA\n    if (verbose) {\n      insight::format_warning(\n        \"Default value can't be `NULL`, setting to `NA` now.\"\n      )\n    }\n  }\n\n  # create default output vector\n  out <- rep(default, times = len)\n\n  all_recodes <- NULL\n  all_same_length <- NULL\n  new_values <- NULL\n  # check recode values\n  for (i in seq_len(n_params)) {\n    # get type of all recode values\n    if (is.null(data)) {\n      value_type <- .dynEval(dots[[i]][[3]], ifnotfound = NULL)\n      value_length <- .dynEval(dots[[i]][[2]], ifnotfound = NULL)\n    } else {\n      value_type <- with(data, eval(dots[[i]][[3]]))\n      value_length <- with(data, eval(dots[[i]][[2]]))\n    }\n    # if we have \"NA\", we don't want to check the type. Else, you cannot use\n    # \"NA\" for numeric recodes, but rather need to use \"NA_real_\", which is not\n    # user-friendly\n    if (is.na(value_type)) {\n      type <- NULL\n    } else {\n      type <- typeof(value_type)\n    }\n    len_matches <- length(value_length)\n    # save type and length of recode values\n    all_recodes <- c(all_recodes, type)\n    all_same_length <- c(all_same_length, len_matches)\n    new_values <- c(new_values, value_type)\n  }\n  # if we have mixed types, warn user\n  if (!is.null(all_recodes) && !all(all_recodes == all_recodes[1])) {\n    wrong_type <- which(all_recodes != all_recodes[1])\n    insight::format_error(\n      paste(\n        \"Recoding not carried out. Not all recode values are of the same type.\",\n        sprintf(\n          \"For instance, the new value of the first pattern, `%s`, is of type `%s`. The new value of the %s recode pattern, `%s`, is of type `%s`.\", # nolint\n          insight::color_text(new_values[1], \"cyan\"),\n          insight::color_text(all_recodes[1], \"cyan\"),\n          .number_to_text(wrong_type[1]),\n          insight::color_text(new_values[wrong_type[1]], \"cyan\"),\n          insight::color_text(all_recodes[wrong_type[1]], \"cyan\")\n        )\n      )\n    )\n  }\n  # all inputs of correct length?\n  if (\n    !is.null(all_same_length) && !all(all_same_length == all_same_length[1])\n  ) {\n    wrong_length <- which(all_same_length != all_same_length[1])\n    insight::format_error(\n      \"The matching conditions return vectors of different length.\",\n      paste(\n        \"Please check if all variables in your recode patterns are of the same length.\",\n        sprintf(\n          \"For instance, the first and the %s recode pattern return vectors of different length.\",\n          .number_to_text(wrong_length[1])\n        )\n      )\n    )\n  }\n\n  # indicator to show message when replacing NA by default\n  # needed to show message only once\n  overwrite_NA_msg <- TRUE\n\n  # iterate all expressions\n  for (i in seq_len(n_params)) {\n    # grep index of observations with replacements and replacement value\n    if (is.null(data)) {\n      index <- .dynEval(dots[[i]][[2]], ifnotfound = NULL)\n      value <- .dynEval(dots[[i]][[3]], ifnotfound = NULL)\n    } else {\n      index <- with(data, eval(dots[[i]][[2]]))\n      value <- with(data, eval(dots[[i]][[3]]))\n    }\n    # remember missing values, so we can add back later\n    missing_index <- is.na(index)\n    # make sure index has no missing values. when we have missing values in\n    # original expression, these are considered as \"no match\" and set to FALSE\n    # we handle NA value later and thus want to remove them from \"index\" now\n    index[is.na(index)] <- FALSE\n    # overwriting values? do more recode-patterns match the same case?\n    if (is.na(default)) {\n      already_exists <- !is.na(out[index])\n    } else {\n      already_exists <- out[index] != default\n    }\n    # save indices of overwritten cases\n    overwritten_cases <- which(index)[already_exists]\n    # tell user...\n    if (any(already_exists, na.rm = TRUE) && verbose) {\n      if (overwrite) {\n        msg <- paste(\n          \"Several recode patterns apply to the same cases.\",\n          \"Some of the already recoded cases will be overwritten with new values again\",\n          sprintf(\n            \"(e.g. pattern %i overwrites the former recode of case %i).\",\n            i,\n            overwritten_cases[1]\n          )\n        )\n      } else {\n        msg <- paste(\n          \"Several recode patterns apply to the same cases.\",\n          \"Some of the already recoded cases will not be altered by later recode patterns.\",\n          sprintf(\n            \"(e.g. pattern %i also matches the former recode of case %i).\",\n            i,\n            overwritten_cases[1]\n          )\n        )\n      }\n      insight::format_warning(msg, \"Please check if this is intentional!\")\n    }\n    # if user doesn't want to overwrite, remove already recoded indices\n    if (!overwrite) {\n      index[overwritten_cases] <- FALSE\n    }\n    # write new values into output vector\n    out[index] <- value\n    # set back missing values\n    if (any(missing_index) && !is.na(default)) {\n      if (preserve_na) {\n        # but only where we still have default values\n        # we don't want to overwrite already recoded values with NA\n        out[missing_index & out == default] <- NA\n      } else if (overwrite_NA_msg && verbose) {\n        # don't show msg again\n        overwrite_NA_msg <- FALSE\n        insight::format_alert(\n          \"Missing values in original variable are overwritten by default value. If you want to preserve missing values, set `preserve_na = TRUE`.\" # nolint\n        )\n      }\n    }\n  }\n\n  out\n}\n\n.number_to_text <- function(x) {\n  if (is.null(x) || is.na(x)) {\n    return(\"\")\n  }\n  if (x == 1) {\n    \"first\"\n  } else if (x == 2) {\n    \"second\"\n  } else if (x == 3) {\n    \"third\"\n  } else if (x == 4) {\n    \"fourth\"\n  } else if (x == 5) {\n    \"fifth\"\n  } else if (x == 21) {\n    \"twenty-first\"\n  } else if (x == 22) {\n    \"twenty-second\"\n  } else if (x == 23) {\n    \"twenty-third\"\n  } else {\n    paste0(x, \"th\")\n  }\n}\n"
  },
  {
    "path": "R/recode_values.R",
    "content": "#' @title Recode old values of variables into new values\n#' @name recode_values\n#'\n#' @description\n#' This functions recodes old values into new values and can be used to to\n#' recode numeric or character vectors, or factors.\n#'\n#' @param x A data frame, numeric or character vector, or factor.\n#' @param recode A list of named vectors, which indicate the recode pairs.\n#'   The _names_ of the list-elements (i.e. the left-hand side) represent the\n#'   _new_ values, while the values of the list-elements indicate the original\n#'   (old) values that should be replaced. When recoding numeric vectors,\n#'   element names have to be surrounded in backticks. For example,\n#'   ``recode=list(`0`=1)`` would recode all `1` into `0` in a numeric\n#'   vector. See also 'Examples' and 'Details'.\n#' @param default Defines the default value for all values that have no match in\n#'   the recode-pairs. If `NULL`, original values will be preserved when there\n#'   is no match. Note that, if `preserve_na=FALSE`, missing values (`NA`) are\n#'   also captured by the `default` argument, and thus will also be recoded into\n#'   the specified value. See 'Examples' and 'Details'.\n#' @param preserve_na Logical, if `TRUE`, `NA` (missing values) are preserved.\n#'   This overrides any other arguments, including `default`. Hence, if\n#'   `preserve_na=TRUE`, `default` will no longer convert `NA` into the specified\n#'   default value.\n#' @param ... not used.\n#' @inheritParams extract_column_names\n#' @inheritParams categorize\n#'\n#' @return `x`, where old values are replaced by new values.\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @inherit data_rename seealso\n#'\n#' @note You can use `options(data_recode_pattern = \"old=new\")` to switch the\n#' behaviour of the `recode`-argument, i.e. recode-pairs are now following the\n#' pattern `old values = new values`, e.g. if `getOption(\"data_recode_pattern\")`\n#' is set to `\"old=new\"`, then ``recode(`1`=0)`` would recode all 1 into 0.\n#' The default for ``recode(`1`=0)`` is to recode all 0 into 1.\n#'\n#' @details\n#' This section describes the pattern of the `recode` arguments, which also\n#' provides some shortcuts, in particular when recoding numeric values.\n#'\n#' - Single values\n#'\n#'   Single values either need to be wrapped in backticks (in case of numeric\n#'   values) or \"as is\" (for character or factor levels). Example:\n#'   ``recode=list(`0`=1,`1`=2)`` would recode 1 into 0, and 2 into 1.\n#'   For factors or character vectors, an example is:\n#'   `recode=list(x=\"a\",y=\"b\")` (recode \"a\" into \"x\" and \"b\" into \"y\").\n#'\n#' - Multiple values\n#'\n#'   Multiple values that should be recoded into a new value can be separated\n#'   with comma. Example: ``recode=list(`1`=c(1,4),`2`=c(2,3))`` would recode the\n#'   values 1 and 4 into 1, and 2 and 3 into 2. It is also possible to define  the\n#'   old values as a character string, like:  ``recode=list(`1`=\"1,4\",`2`=\"2,3\")``\n#'   For factors or character vectors, an example is:\n#'   ``recode=list(x=c(\"a\",\"b\"),y=c(\"c\",\"d\"))``.\n#'\n#' - Value range\n#'\n#'   Numeric value ranges can be defined using the `:`. Example:\n#'   ``recode=list(`1`=1:3,`2`=4:6)`` would recode all values from 1 to 3 into\n#'   1, and 4 to 6 into 2.\n#'\n#' - `min` and `max`\n#'\n#'   placeholder to use the minimum or maximum value of the\n#'   (numeric) variable. Useful, e.g., when recoding ranges of values.\n#'   Example: ``recode=list(`1`=\"min:10\",`2`=\"11:max\")``.\n#'\n#' - `default` values\n#'\n#'   The `default` argument defines the default value for all values that have\n#'   no match in the recode-pairs. For example,\n#'   ``recode=list(`1`=c(1,2),`2`=c(3,4)), default=9`` would\n#'   recode values 1 and 2 into 1, 3 and 4 into 2, and all other values into 9.\n#'   If `preserve_na` is set to `FALSE`, `NA` (missing values) will also be\n#'   recoded into the specified default value.\n#'\n#' - Reversing and rescaling\n#'\n#'   See [reverse()] and [rescale()].\n#'\n#' @examples\n#' # numeric ----------\n#' set.seed(123)\n#' x <- sample(c(1:4, NA), 15, TRUE)\n#' table(x, useNA = \"always\")\n#'\n#' out <- recode_values(x, list(`0` = 1, `1` = 2:3, `2` = 4))\n#' out\n#' table(out, useNA = \"always\")\n#'\n#' # to recode NA values, set preserve_na to FALSE\n#' out <- recode_values(\n#'   x,\n#'   list(`0` = 1, `1` = 2:3, `2` = 4, `9` = NA),\n#'   preserve_na = FALSE\n#' )\n#' out\n#' table(out, useNA = \"always\")\n#'\n#' # preserve na ----------\n#' out <- recode_values(x, list(`0` = 1, `1` = 2:3), default = 77)\n#' out\n#' table(out, useNA = \"always\")\n#'\n#' # recode na into default ----------\n#' out <- recode_values(\n#'   x,\n#'   list(`0` = 1, `1` = 2:3),\n#'   default = 77,\n#'   preserve_na = FALSE\n#' )\n#' out\n#' table(out, useNA = \"always\")\n#'\n#'\n#' # factors (character vectors are similar) ----------\n#' set.seed(123)\n#' x <- as.factor(sample(c(\"a\", \"b\", \"c\"), 15, TRUE))\n#' table(x)\n#'\n#' out <- recode_values(x, list(x = \"a\", y = c(\"b\", \"c\")))\n#' out\n#' table(out)\n#'\n#' out <- recode_values(x, list(x = \"a\", y = \"b\", z = \"c\"))\n#' out\n#' table(out)\n#'\n#' out <- recode_values(x, list(y = \"b,c\"), default = 77)\n#' # same as\n#' # recode_values(x, list(y = c(\"b\", \"c\")), default = 77)\n#' out\n#' table(out)\n#'\n#'\n#' # data frames ----------\n#' set.seed(123)\n#' d <- data.frame(\n#'   x = sample(c(1:4, NA), 12, TRUE),\n#'   y = as.factor(sample(c(\"a\", \"b\", \"c\"), 12, TRUE)),\n#'   stringsAsFactors = FALSE\n#' )\n#'\n#' recode_values(\n#'   d,\n#'   recode = list(`0` = 1, `1` = 2:3, `2` = 4, x = \"a\", y = c(\"b\", \"c\")),\n#'   append = TRUE\n#' )\n#'\n#'\n#' # switch recode pattern to \"old=new\" ----------\n#' options(data_recode_pattern = \"old=new\")\n#'\n#' # numeric\n#' set.seed(123)\n#' x <- sample(c(1:4, NA), 15, TRUE)\n#' table(x, useNA = \"always\")\n#'\n#' out <- recode_values(x, list(`1` = 0, `2:3` = 1, `4` = 2))\n#' table(out, useNA = \"always\")\n#'\n#' # factors (character vectors are similar)\n#' set.seed(123)\n#' x <- as.factor(sample(c(\"a\", \"b\", \"c\"), 15, TRUE))\n#' table(x)\n#'\n#' out <- recode_values(x, list(a = \"x\", `b, c` = \"y\"))\n#' table(out)\n#'\n#' # reset options\n#' options(data_recode_pattern = NULL)\n#' @export\nrecode_values <- function(x, ...) {\n  UseMethod(\"recode_values\")\n}\n\n\n#' @export\nrecode_values.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      paste0(\n        \"Variables of class `\",\n        class(x)[1],\n        \"` can't be recoded and remain unchanged.\"\n      )\n    )\n  }\n  x\n}\n\n\n#' @rdname recode_values\n#' @export\nrecode_values.numeric <- function(\n  x,\n  recode = NULL,\n  default = NULL,\n  preserve_na = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  # save\n  original_x <- x\n\n  # check arguments\n  if (!.recode_args_ok(x, recode, verbose)) {\n    return(x)\n  }\n\n  # recode-pattern option\n  pattern <- getOption(\"data_recode_pattern\")\n\n  # make sure NAs are preserved after recoding\n  missing_values <- NULL\n  if (preserve_na) {\n    missing_values <- is.na(x)\n  }\n\n  # check for \"default\" token\n  if (!is.null(default)) {\n    # set the default value for all values that have no match\n    # (i.e. that should not be recoded)\n    x <- rep(as.numeric(default), length = length(x))\n  }\n\n  for (i in names(recode)) {\n    # based on option-settings, the recode-argument can either follow the\n    # pattern \"new=old\", or \"old=new\"\n\n    if (identical(pattern, \"old=new\")) {\n      # pattern: old = new, name of list element is old value\n      old_values <- i\n      new_values <- recode[[i]]\n    } else {\n      # pattern: new = old, name of list element is new value\n      old_values <- recode[[i]]\n      new_values <- i\n    }\n\n    if (is.character(old_values)) {\n      # replace placeholder\n      old_values <- gsub(\"min\", min(x, na.rm = TRUE), old_values, fixed = TRUE)\n      old_values <- gsub(\"max\", max(x, na.rm = TRUE), old_values, fixed = TRUE)\n\n      # mimic vector\n      if (length(old_values) == 1 && !grepl(\"c(\", old_values, fixed = TRUE)) {\n        old_values <- paste0(\"c(\", old_values, \")\")\n      }\n\n      # parse old values, which are strings (names of element), but which should\n      # contain values, like \"1:10\" or \"1, 2, 3, 4\". These should now be in the\n      # format \"c(1, 2, 3, 4)\" or \"c(1:10)\", and it should be possible to parse\n      # and evaluate these strings into a numeric vector\n      old_values <- tryCatch(\n        eval(parse(text = old_values)),\n        error = function(e) NULL\n      )\n    }\n\n    if (!is.null(old_values) && (is.numeric(old_values) || is.na(old_values))) {\n      x[which(original_x %in% old_values)] <- as.numeric(new_values)\n    }\n  }\n\n  # set back variable labels, remove value labels\n  # (these are most likely not matching anymore)\n  attr(x, \"label\") <- attr(original_x, \"label\", exact = TRUE)\n  attr(x, \"labels\") <- NULL\n\n  # set back missing values\n  if (!is.null(missing_values)) {\n    x[missing_values] <- NA\n  }\n\n  x\n}\n\n\n#' @export\nrecode_values.factor <- function(\n  x,\n  recode = NULL,\n  default = NULL,\n  preserve_na = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  # save\n  original_x <- x\n\n  # check arguments\n  if (!.recode_args_ok(x, recode, verbose)) {\n    return(x)\n  }\n\n  # recode-pattern option\n  pattern <- getOption(\"data_recode_pattern\")\n\n  # make sure NAs are preserved after recoding\n  missing_values <- NULL\n  if (preserve_na) {\n    missing_values <- is.na(x)\n  }\n\n  # as character, so recoding works\n  x <- as.character(x)\n\n  # check for \"default\" token\n  if (!is.null(default)) {\n    # set the default value for all values that have no match\n    # (i.e. that should not be recoded)\n    x <- rep(as.character(default), length = length(x))\n  }\n\n  for (i in names(recode)) {\n    # based on option-settings, the recode-argument can either follow the\n    # pattern \"new=old\", or \"old=new\"\n\n    if (identical(pattern, \"old=new\")) {\n      # pattern: old = new\n      # name of list element is old value\n\n      old_values <- paste(\n        deparse(insight::trim_ws(unlist(\n          strsplit(i, \",\", fixed = TRUE),\n          use.names = FALSE\n        ))),\n        collapse = \",\"\n      )\n\n      # parse old values, which are strings (names of element), but which should\n      # contain values, like \"a\" or \"a, b, c\". These should now be in the\n      # format \"c(\"a\", \"b\", \"c\")\" and it should be possible to parse\n      # and evaluate these strings into a numeric vector\n      old_values <- tryCatch(\n        eval(parse(text = old_values)),\n        error = function(e) NULL\n      )\n\n      # recode\n      x[which(original_x %in% old_values)] <- recode[[i]]\n    } else {\n      # pattern: new = old\n      # name of list element is new value\n\n      old_values <- as.character(recode[[i]])\n      # check input style: \"a, b, c\"\n      if (length(old_values) == 1 && grepl(\",\", old_values, fixed = TRUE)) {\n        # split and make character vector\n        old_values <- insight::trim_ws(unlist(\n          strsplit(old_values, \",\", fixed = TRUE),\n          use.names = FALSE\n        ))\n      }\n      # recode\n      if (identical(i, \"NA\")) {\n        x[which(original_x %in% old_values)] <- NA_character_\n      } else {\n        x[which(original_x %in% old_values)] <- as.character(i)\n      }\n    }\n  }\n\n  # set back missing values\n  if (!is.null(missing_values)) {\n    x[missing_values] <- NA_character_\n  }\n\n  # make sure we have correct new levels\n  x <- droplevels(as.factor(x))\n\n  # set back variable labels, remove value labels\n  # (these are most likely not matching anymore)\n  attr(x, \"label\") <- attr(original_x, \"label\", exact = TRUE)\n  attr(x, \"labels\") <- NULL\n\n  x\n}\n\n\n#' @export\nrecode_values.character <- function(\n  x,\n  recode = NULL,\n  default = NULL,\n  preserve_na = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  # save\n  original_x <- x\n\n  # check arguments\n  if (!.recode_args_ok(x, recode, verbose)) {\n    return(x)\n  }\n\n  # recode-pattern option\n  pattern <- getOption(\"data_recode_pattern\")\n\n  # make sure NAs are preserved after recoding\n  missing_values <- NULL\n  if (preserve_na) {\n    missing_values <- is.na(x)\n  }\n\n  # check for \"default\" token\n  if (!is.null(default)) {\n    # set the default value for all values that have no match\n    # (i.e. that should not be recoded)\n    x <- rep(as.character(default), length = length(x))\n  }\n\n  for (i in names(recode)) {\n    # based on option-settings, the recode-argument can either follow the\n    # pattern \"new=old\", or \"old=new\"\n\n    if (identical(pattern, \"old=new\")) {\n      # pattern: old = new\n      # name of list element is old value\n\n      # name of list element is old value\n      value_string <- paste(\n        deparse(insight::trim_ws(unlist(\n          strsplit(i, \",\", fixed = TRUE),\n          use.names = FALSE\n        ))),\n        collapse = \",\"\n      )\n\n      # parse old values, which are strings (names of element), but which should\n      # contain values, like \"a\" or \"a, b, c\". These should now be in the\n      # format \"c(\"a\", \"b\", \"c\")\" and it should be possible to parse\n      # and evaluate these strings into a numeric vector\n      old_values <- tryCatch(\n        eval(parse(text = value_string)),\n        error = function(e) NULL\n      )\n\n      # recode\n      x[which(original_x %in% old_values)] <- recode[[i]]\n    } else {\n      # pattern: new = old\n      # name of list element is new value\n\n      old_values <- as.character(recode[[i]])\n      # check input style: \"a, b, c\"\n      if (length(old_values) == 1 && grepl(\",\", old_values, fixed = TRUE)) {\n        # split and make character vector\n        old_values <- insight::trim_ws(unlist(\n          strsplit(old_values, \",\", fixed = TRUE),\n          use.names = FALSE\n        ))\n      }\n      # recode\n      if (identical(i, \"NA\")) {\n        x[which(original_x %in% old_values)] <- NA_character_\n      } else {\n        x[which(original_x %in% old_values)] <- as.character(i)\n      }\n    }\n  }\n\n  # set back variable labels, remove value labels\n  # (these are most likely not matching anymore)\n  attr(x, \"label\") <- attr(original_x, \"label\", exact = TRUE)\n  attr(x, \"labels\") <- NULL\n\n  # set back missing values\n  if (!is.null(missing_values)) {\n    x[missing_values] <- NA_character_\n  }\n\n  x\n}\n\n\n#' @rdname recode_values\n#' @export\nrecode_values.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  recode = NULL,\n  default = NULL,\n  preserve_na = TRUE,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_r\",\n      preserve_value_labels = TRUE\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  x[select] <- lapply(\n    x[select],\n    recode_values,\n    recode = recode,\n    default = default,\n    preserve_na = preserve_na,\n    verbose = verbose,\n    ...\n  )\n\n  x\n}\n\n\n# utils --------------------------\n\n.recode_args_ok <- function(x, recode, verbose) {\n  ok <- TRUE\n  # no missings\n  valid <- stats::na.omit(x)\n\n  # skip if all NA\n  if (!length(valid)) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\n        \"Variable contains only missing values. No recoding carried out.\"\n      )\n    }\n    ok <- FALSE\n  }\n\n  # warn if not a list\n  if (!is.list(recode) || is.null(names(recode))) {\n    if (isTRUE(verbose)) {\n      insight::format_warning(\n        \"`recode` needs to be a (named) list. No recoding carried out.\"\n      )\n    }\n    ok <- FALSE\n  }\n\n  ok\n}\n"
  },
  {
    "path": "R/remove_empty.R",
    "content": "#' @title Return or remove variables or observations that are completely missing\n#' @name remove_empty\n#' @rdname remove_empty\n#'\n#' @description\n#'\n#' These functions check which rows or columns of a data frame completely\n#' contain missing values, i.e. which observations or variables completely have\n#' missing values, and either (1) returns their indices; or (2) removes them\n#' from the data frame.\n#'\n#' @param x A data frame.\n#'\n#' @return\n#'\n#' - For `empty_columns()` and `empty_rows()`, a numeric (named) vector with row\n#' or column indices of those variables that completely have missing values.\n#'\n#' - For `remove_empty_columns()` and `remove_empty_rows()`, a data frame with\n#' \"empty\" columns or rows removed, respectively.\n#'\n#' - For `remove_empty()`, **both** empty rows and columns will be removed.\n#'\n#' @details For character vectors, empty string values (i.e. `\"\"`) are also\n#' considered as missing value. Thus, if a character vector only contains `NA`\n#' and `\"\"`, it is considered as empty variable and will be removed. Same\n#' applies to observations (rows) that only contain `NA` or `\"\"`.\n#'\n#' @examples\n#' tmp <- data.frame(\n#'   a = c(1, 2, 3, NA, 5),\n#'   b = c(1, NA, 3, NA, 5),\n#'   c = c(NA, NA, NA, NA, NA),\n#'   d = c(1, NA, 3, NA, 5)\n#' )\n#'\n#' tmp\n#'\n#' # indices of empty columns or rows\n#' empty_columns(tmp)\n#' empty_rows(tmp)\n#'\n#' # remove empty columns or rows\n#' remove_empty_columns(tmp)\n#' remove_empty_rows(tmp)\n#'\n#' # remove empty columns and rows\n#' remove_empty(tmp)\n#'\n#' # also remove \"empty\" character vectors\n#' tmp <- data.frame(\n#'   a = c(1, 2, 3, NA, 5),\n#'   b = c(1, NA, 3, NA, 5),\n#'   c = c(\"\", \"\", \"\", \"\", \"\"),\n#'   stringsAsFactors = FALSE\n#' )\n#' empty_columns(tmp)\n#'\n#' @export\nempty_columns <- function(x) {\n  if ((!is.matrix(x) && !is.data.frame(x)) || ncol(x) < 2) {\n    vector(\"numeric\")\n  } else {\n    all_na <- colSums(is.na(x)) == nrow(x)\n    all_empty <- vapply(\n      x,\n      function(i) {\n        (is.character(i) || is.factor(i)) &&\n          !any(nzchar(as.character(i[!is.na(i)])))\n      },\n      FUN.VALUE = logical(1L)\n    )\n\n    which(all_na | all_empty)\n  }\n}\n\n\n#' @rdname remove_empty\n#' @export\nempty_rows <- function(x) {\n  if ((!is.matrix(x) && !is.data.frame(x)) || nrow(x) < 2) {\n    vector(\"numeric\")\n  } else {\n    which(rowSums((is.na(x) | x == \"\")) == ncol(x)) # nolint\n  }\n}\n\n\n#' @rdname remove_empty\n#' @export\nremove_empty_columns <- function(x) {\n  # check if we have any empty columns at all\n  ec <- empty_columns(x)\n\n  # if yes, removing works, else an empty df would be returned\n  if (length(ec)) {\n    x <- x[-ec]\n  }\n\n  x\n}\n\n\n#' @rdname remove_empty\n#' @export\nremove_empty_rows <- function(x) {\n  # check if we have any empty rows at all\n  er <- empty_rows(x)\n\n  # if yes, removing works, else an empty df would be returned\n  if (length(er)) {\n    attr_data <- attributes(x)\n    x <- x[-er, ]\n    x <- .replace_attrs(x, attr_data)\n  }\n\n  x\n}\n\n#' @rdname remove_empty\n#' @export\nremove_empty <- function(x) {\n  x <- remove_empty_rows(x)\n  x <- remove_empty_columns(x)\n  x\n}\n"
  },
  {
    "path": "R/replace_nan_inf.R",
    "content": "#' @title Convert infinite or `NaN` values into `NA`\n#' @name replace_nan_inf\n#'\n#' @description\n#' Replaces all infinite (`Inf` and `-Inf`) or `NaN` values with `NA`.\n#'\n#' @param x A vector or a dataframe\n#' @param ... Currently not used.\n#'\n#' @return\n#' Data with `Inf`, `-Inf`, and `NaN` converted to `NA`.\n#'\n#' @examples\n#' # a vector\n#' x <- c(1, 2, NA, 3, NaN, 4, NA, 5, Inf, -Inf, 6, 7)\n#' replace_nan_inf(x)\n#'\n#' # a data frame\n#' df <- data.frame(\n#'   x = c(1, NA, 5, Inf, 2, NA),\n#'   y = c(3, NaN, 4, -Inf, 6, 7),\n#'   stringsAsFactors = FALSE\n#' )\n#' replace_nan_inf(df)\n#' @export\n\nreplace_nan_inf <- function(x, ...) {\n  UseMethod(\"replace_nan_inf\")\n}\n\n#' @export\nreplace_nan_inf.default <- function(x, ...) {\n  x[is.nan(x) | is.infinite(x)] <- NA\n  x\n}\n\n#' @inheritParams extract_column_names\n#' @export\nreplace_nan_inf.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # Select and deselect\n  cols <- .select_nse(\n    select,\n    x,\n    exclude = exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  for (i in cols) {\n    x[[i]] <- replace_nan_inf(x[[i]])\n  }\n\n  x\n}\n"
  },
  {
    "path": "R/rescale_weights.R",
    "content": "#' @title Rescale design weights for multilevel analysis\n#' @name rescale_weights\n#'\n#' @description Most functions to fit multilevel and mixed effects models only\n#' allow the user to specify frequency weights, but not design (i.e., sampling\n#' or probability) weights, which should be used when analyzing complex samples\n#' (e.g., probability samples). `rescale_weights()` implements two algorithms,\n#' one proposed by \\cite{Asparouhov (2006)} and \\cite{Carle (2009)}, to rescale\n#' design weights in survey data to account for the grouping structure of\n#' multilevel models, and one based on the design effect proposed by\n#' \\cite{Kish (1965)}, to rescale weights by the design effect to account for\n#' additional sampling error introduced by weighting.\n#' @param data A data frame.\n#' @param by Variable names (as character vector, or as formula), indicating\n#' the grouping structure (strata) of the survey data (level-2-cluster\n#' variable). It is also possible to create weights for multiple group\n#' variables; in such cases, each created weighting variable will be suffixed\n#' by the name of the group variable. This argument is required for\n#' `method = \"carle\"`, but optional for `method = \"kish\"`.\n#' @param probability_weights Variable indicating the probability (design or\n#' sampling) weights of the survey data (level-1-weight), provided as character\n#' string or formula.\n#' @param nest Logical, if `TRUE` and `by` indicates at least two group\n#' variables, then groups are \"nested\", i.e. groups are now a combination from\n#' each group level of the variables in `by`. This argument is not used when\n#' `method = \"kish\"`.\n#' @param method String, indicating which rescale-method is used for rescaling\n#' weights. Can be either `\"carle\"` (default) or `\"kish\"`. See 'Details'. If\n#' `method = \"carle\"`, the `by` argument is required.\n#'\n#' @return\n#' `data`, including the new weighting variable(s). For `method = \"carle\"`, new\n#' columns `rescaled_weights_a` and `rescaled_weights_b` are returned, and for\n#' `method = \"kish\"`, the returned data contains a column `rescaled_weights`.\n#' These represent the rescaled design weights to use in multilevel models (use\n#' these variables for the `weights` argument).\n#'\n#' @details\n#' - `method = \"carle\"`\n#'\n#'   Rescaling is based on two methods: For `rescaled_weights_a`, the sample\n#'   weights `probability_weights` are adjusted by a factor that represents the\n#'   proportion of group size divided by the sum of sampling weights within each\n#'   group. The adjustment factor for `rescaled_weights_b` is the sum of sample\n#'   weights within each group divided by the sum of squared sample weights\n#'   within each group (see Carle (2009), Appendix B). In other words,\n#'   `rescaled_weights_a` \"scales the weights so that the new weights sum to the\n#'   cluster sample size\" while `rescaled_weights_b` \"scales the weights so that\n#'   the new weights sum to the effective cluster size\".\n#'\n#'   Regarding the choice between scaling methods A and B, Carle suggests that\n#'   \"analysts who wish to discuss point estimates should report results based\n#'   on weighting method A. For analysts more interested in residual\n#'   between-group variance, method B may generally provide the least biased\n#'   estimates\". In general, it is recommended to fit a non-weighted model and\n#'   weighted models with both scaling methods and when comparing the models,\n#'   see whether the \"inferential decisions converge\", to gain confidence in the\n#'   results.\n#'\n#'   Though the bias of scaled weights decreases with increasing group size,\n#'   method A is preferred when insufficient or low group size is a concern.\n#'\n#'   The group ID and probably PSU may be used as random effects (e.g. nested\n#'   design, or group and PSU as varying intercepts), depending on the survey\n#'   design that should be mimicked.\n#'\n#' - `method = \"kish\"`\n#'\n#'   Rescaling is based on scaling the sample weights so the mean value is 1,\n#'   which means the sum of all weights equals the sample size. Next, the design\n#'   effect (_Kish 1965_) is calculated, which is the mean of the squared\n#'   weights divided by the squared mean of the weights. The scaled sample\n#'   weights are then divided by the design effect. This method is most\n#'   appropriate when weights are based on additional variables beyond the\n#'   grouping variables in the model (e.g., other demographic characteristics),\n#'   but may also be useful in other contexts.\n#'\n#'   Some tests on real-world survey-data suggest that, in comparison to the\n#'   Carle-method, the Kish-method comes closer to estimates from a regular\n#'   survey-design using the **survey** package. Note that these tests are not\n#'   representative and it is recommended to check your results against a\n#'   standard survey-design.\n#'\n#' @references\n#'   - Asparouhov T. (2006). General Multi-Level Modeling with Sampling\n#'   Weights. Communications in Statistics - Theory and Methods 35: 439-460\n#'\n#'   - Carle A.C. (2009). Fitting multilevel models in complex survey data\n#'   with design weights: Recommendations. BMC Medical Research Methodology\n#'   9(49): 1-13\n#'\n#'   - Kish, L. (1965) Survey Sampling. London: Wiley.\n#'\n#' @examplesIf all(insight::check_if_installed(c(\"lme4\", \"parameters\"), quietly = TRUE))\n#' data(nhanes_sample)\n#' head(rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\"))\n#'\n#' # also works with multiple group-variables\n#' head(rescale_weights(nhanes_sample, \"WTINT2YR\", c(\"SDMVSTRA\", \"SDMVPSU\")))\n#'\n#' # or nested structures.\n#' x <- rescale_weights(\n#'   data = nhanes_sample,\n#'   probability_weights = \"WTINT2YR\",\n#'   by = c(\"SDMVSTRA\", \"SDMVPSU\"),\n#'   nest = TRUE\n#' )\n#' head(x)\n#'\n#' \\donttest{\n#' # compare different methods, using multilevel-Poisson regression\n#'\n#' d <- rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\")\n#' result1 <- lme4::glmer(\n#'   total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),\n#'   family = poisson(),\n#'   data = d,\n#'   weights = rescaled_weights_a\n#' )\n#' result2 <- lme4::glmer(\n#'   total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),\n#'   family = poisson(),\n#'   data = d,\n#'   weights = rescaled_weights_b\n#' )\n#'\n#' d <- rescale_weights(\n#'   nhanes_sample,\n#'   \"WTINT2YR\",\n#'   method = \"kish\"\n#' )\n#' result3 <- lme4::glmer(\n#'   total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),\n#'   family = poisson(),\n#'   data = d,\n#'   weights = rescaled_weights\n#' )\n#' d <- rescale_weights(\n#'   nhanes_sample,\n#'   \"WTINT2YR\",\n#'   \"SDMVSTRA\",\n#'   method = \"kish\"\n#' )\n#' result4 <- lme4::glmer(\n#'   total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),\n#'   family = poisson(),\n#'   data = d,\n#'   weights = rescaled_weights\n#' )\n#' parameters::compare_parameters(\n#'   list(result1, result2, result3, result4),\n#'   exponentiate = TRUE,\n#'   column_names = c(\"Carle (A)\", \"Carle (B)\", \"Kish\", \"Kish (grouped)\")\n#' )\n#' }\n#' @export\nrescale_weights <- function(\n  data,\n  probability_weights = NULL,\n  by = NULL,\n  nest = FALSE,\n  method = \"carle\"\n) {\n  method <- insight::validate_argument(method, c(\"carle\", \"kish\"))\n\n  # convert formulas to strings\n  if (inherits(by, \"formula\")) {\n    by <- all.vars(by)\n  }\n\n  if (inherits(probability_weights, \"formula\")) {\n    probability_weights <- all.vars(probability_weights)\n  }\n\n  # check for existing variable names\n  if (\n    (method == \"carle\" &&\n      any(c(\"rescaled_weights_a\", \"rescaled_weights_b\") %in% colnames(data))) ||\n      (method == \"kish\" && \"rescaled_weights\" %in% colnames(data))\n  ) {\n    insight::format_warning(\n      \"The variable name for the rescaled weights already exists in the data. Returned columns will be renamed into unique names.\"\n    ) # nolint\n  }\n\n  # need probability_weights\n  if (is.null(probability_weights)) {\n    insight::format_error(\n      \"The argument `probability_weights` is missing, but required to rescale weights.\"\n    )\n  }\n\n  # check if weight has missings. we need to remove them first,\n  # and add back weights to correct cases later\n\n  weight_missings <- which(is.na(data[[probability_weights]]))\n  weight_non_na <- which(!is.na(data[[probability_weights]]))\n\n  if (length(weight_missings) > 0) {\n    data_tmp <- data[weight_non_na, ]\n  } else {\n    data_tmp <- data\n  }\n\n  fun_args <- list(\n    nest = nest,\n    probability_weights = probability_weights,\n    data_tmp = data_tmp,\n    data = data,\n    by = by,\n    weight_non_na = weight_non_na\n  )\n\n  switch(\n    method,\n    carle = do.call(.rescale_weights_carle, fun_args),\n    do.call(.rescale_weights_kish, fun_args)\n  )\n}\n\n\n# rescale weights, method Kish ----------------------------\n\n.rescale_weights_kish <- function(\n  nest,\n  probability_weights,\n  data_tmp,\n  data,\n  by,\n  weight_non_na\n) {\n  # sort id\n  data_tmp$.bamboozled <- seq_len(nrow(data_tmp))\n\n  # `nest` is currently ignored\n  if (isTRUE(nest)) {\n    insight::format_warning(\n      \"Argument `nest` is ignored for `method = \\\"kish\\\"`.\"\n    )\n  }\n\n  # check by argument\n  if (!is.null(by) && !all(by %in% colnames(data_tmp))) {\n    dont_exist <- setdiff(by, colnames(data_tmp))\n    insight::format_error(\n      paste0(\n        \"The following variable(s) specified in `by` don't exist in the dataset: \",\n        text_concatenate(dont_exist),\n        \".\"\n      ),\n      .misspelled_string(colnames(data_tmp), dont_exist, \"Possibly misspelled?\")\n    )\n  } else if (is.null(by)) {\n    # if `by` = NULL, we create a dummy group\n    by <- \"tmp_kish_by\"\n    data_tmp[[by]] <- 1\n  }\n\n  # split into groups, and calculate weights\n  out <- lapply(split(data_tmp, data_tmp[by]), function(group_data) {\n    p_weights <- group_data[[probability_weights]]\n    # design effect according to Kish\n    deff <- mean(p_weights^2) / (mean(p_weights)^2)\n    # rescale weights, so their mean is 1\n    z_weights <- p_weights * (1 / mean(p_weights))\n    # divide weights by design effect\n    group_data$rescaled_weights <- z_weights / deff\n    group_data\n  })\n\n  # bind data\n  result <- do.call(rbind, out)\n\n  # restore original order\n  result <- result[order(result$.bamboozled), ]\n\n  # add back rescaled weights to original data, but account for missing observations\n  data$rescaled_weights <- NA_real_\n  data$rescaled_weights[weight_non_na] <- result$rescaled_weights\n  # return result\n  data\n}\n\n\n# rescale weights, method Carle ----------------------------\n\n.rescale_weights_carle <- function(\n  nest,\n  probability_weights,\n  data_tmp,\n  data,\n  by,\n  weight_non_na\n) {\n  # sort id\n  data_tmp$.bamboozled <- seq_len(nrow(data_tmp))\n\n  if (is.null(by)) {\n    insight::format_error(\n      \"Argument `by` must be specified. Please provide one or more variable names in `by` that indicate the grouping structure (strata) of the survey data (level-2-cluster variable).\"\n    ) # nolint\n  }\n\n  if (!all(by %in% colnames(data_tmp))) {\n    dont_exist <- setdiff(by, colnames(data_tmp))\n    insight::format_error(\n      paste0(\n        \"The following variable(s) specified in `by` don't exist in the dataset: \",\n        text_concatenate(dont_exist),\n        \".\"\n      ),\n      .misspelled_string(colnames(data_tmp), dont_exist, \"Possibly misspelled?\")\n    )\n  }\n\n  if (nest && length(by) < 2) {\n    insight::format_warning(\n      sprintf(\n        \"Only one group variable selected in `by`, no nested structure possible. Rescaling weights for grout '%s' now.\",\n        by\n      )\n    )\n    nest <- FALSE\n  }\n\n  if (nest) {\n    out <- .rescale_weights_nested(\n      data_tmp,\n      group = by,\n      probability_weights,\n      nrow(data),\n      weight_non_na\n    )\n  } else {\n    out <- lapply(by, function(i) {\n      x <- .rescale_weights(\n        data_tmp,\n        i,\n        probability_weights,\n        nrow(data),\n        weight_non_na\n      )\n      if (length(by) > 1) {\n        colnames(x) <- sprintf(c(\"pweight_a_%s\", \"pweight_b_%s\"), i)\n      }\n      x\n    })\n  }\n\n  make_unique_names <- any(vapply(\n    out,\n    function(i) any(colnames(i) %in% colnames(data)),\n    logical(1)\n  ))\n  # add weights to data frame\n  out <- do.call(cbind, list(data, out))\n  # check if we have to rename columns\n  if (make_unique_names) {\n    colnames(out) <- make.unique(colnames(out), sep = \"_\")\n  }\n\n  out\n}\n\n\n# rescale weights, for one or more group variables ----------------------------\n\n.rescale_weights <- function(x, group, probability_weights, n, weight_non_na) {\n  # compute sum of weights per group\n  design_weights <- .data_frame(\n    group = sort(unique(x[[group]])),\n    sum_weights_by_group = tapply(\n      x[[probability_weights]],\n      as.factor(x[[group]]),\n      sum\n    ),\n    sum_squared_weights_by_group = tapply(\n      x[[probability_weights]]^2,\n      as.factor(x[[group]]),\n      sum\n    ),\n    n_per_group = as.vector(table(x[[group]]))\n  )\n\n  colnames(design_weights)[1] <- group\n  x <- merge(x, design_weights, by = group, sort = FALSE)\n\n  # restore original order\n  x <- x[order(x$.bamboozled), ]\n  x$.bamboozled <- NULL\n\n  # multiply the original weight by the fraction of the\n  # sampling unit total population based on Carle 2009\n\n  w_a <- x[[probability_weights]] * x$n_per_group / x$sum_weights_by_group\n  w_b <- x[[probability_weights]] *\n    x$sum_weights_by_group /\n    x$sum_squared_weights_by_group\n\n  out <- data.frame(\n    rescaled_weights_a = rep(NA_real_, times = n),\n    rescaled_weights_b = rep(NA_real_, times = n)\n  )\n\n  out$rescaled_weights_a[weight_non_na] <- w_a\n  out$rescaled_weights_b[weight_non_na] <- w_b\n\n  out\n}\n\n\n# rescale weights, for nested groups ----------------------------\n\n.rescale_weights_nested <- function(\n  x,\n  group,\n  probability_weights,\n  n,\n  weight_non_na\n) {\n  groups <- expand.grid(lapply(group, function(i) sort(unique(x[[i]]))))\n  colnames(groups) <- group\n\n  # compute sum of weights per group\n  design_weights <- cbind(\n    groups,\n    .data_frame(\n      sum_weights_by_group = unlist(\n        as.list(tapply(\n          x[[probability_weights]],\n          lapply(group, function(i) {\n            as.factor(x[[i]])\n          }),\n          sum\n        )),\n        use.names = FALSE\n      ),\n      sum_squared_weights_by_group = unlist(\n        as.list(tapply(\n          x[[probability_weights]]^2,\n          lapply(group, function(i) {\n            as.factor(x[[i]])\n          }),\n          sum\n        )),\n        use.names = FALSE\n      ),\n      n_per_group = unlist(as.list(table(x[, group])), use.names = FALSE)\n    )\n  )\n\n  x <- merge(x, design_weights, by = group, sort = FALSE)\n\n  # restore original order\n  x <- x[order(x$.bamboozled), ]\n  x$.bamboozled <- NULL\n\n  # multiply the original weight by the fraction of the\n  # sampling unit total population based on Carle 2009\n\n  w_a <- x[[probability_weights]] * x$n_per_group / x$sum_weights_by_group\n  w_b <- x[[probability_weights]] *\n    x$sum_weights_by_group /\n    x$sum_squared_weights_by_group\n\n  out <- data.frame(\n    rescaled_weights_a = rep(NA_real_, times = n),\n    rescaled_weights_b = rep(NA_real_, times = n)\n  )\n\n  out$rescaled_weights_a[weight_non_na] <- w_a\n  out$rescaled_weights_b[weight_non_na] <- w_b\n\n  out\n}\n"
  },
  {
    "path": "R/reshape_ci.R",
    "content": "#' Reshape CI between wide/long formats\n#'\n#' Reshape CI between wide/long formats.\n#'\n#' @param x A data frame containing columns named `CI_low` and `CI_high` (or\n#'   similar, see `ci_type`).\n#' @param ci_type String indicating the \"type\" (i.e. prefix) of the interval\n#'   columns. Per *easystats* convention, confidence or credible intervals are\n#'   named `CI_low` and `CI_high`, and the related `ci_type` would be `\"CI\"`.\n#'   If column names for other intervals differ, `ci_type` can be used to\n#'   indicate the name, e.g. `ci_type = \"SI\"` can be used for support intervals,\n#'   where the column names in the data frame would be `SI_low` and `SI_high`.\n#'\n#' @return\n#'\n#' A data frame with columns corresponding to confidence intervals reshaped\n#' either to wide or long format.\n#'\n#' @examples\n#' x <- data.frame(\n#'   Parameter = c(\"Term 1\", \"Term 2\", \"Term 1\", \"Term 2\"),\n#'   CI = c(0.8, 0.8, 0.9, 0.9),\n#'   CI_low = c(0.2, 0.3, 0.1, 0.15),\n#'   CI_high = c(0.5, 0.6, 0.8, 0.85),\n#'   stringsAsFactors = FALSE\n#' )\n#'\n#' reshape_ci(x)\n#' reshape_ci(reshape_ci(x))\n#' @export\n\nreshape_ci <- function(x, ci_type = \"CI\") {\n  # define interval type\n  ci_type <- match.arg(ci_type, choices = c(\"CI\", \"SI\", \"HDI\", \"ETI\"))\n\n  ci_low <- paste0(ci_type, \"_low\")\n  ci_high <- paste0(ci_type, \"_high\")\n\n  # Long to wide ----------------\n  if (ci_low %in% names(x) && ci_high %in% names(x) && \"CI\" %in% names(x)) {\n    ci_position <- which(names(x) == \"CI\")\n\n    # Reshape\n    if (length(unique(x$CI)) > 1) {\n      if (\"Parameter\" %in% names(x)) {\n        idvar <- \"Parameter\"\n        remove_parameter <- FALSE\n      } else if (is.null(attr(x, \"idvars\"))) {\n        idvar <- \"Parameter\"\n        x$Parameter <- NA\n        remove_parameter <- TRUE\n      } else {\n        idvar <- attr(x, \"idvars\")\n        remove_parameter <- FALSE\n      }\n\n      x <- stats::reshape(\n        x,\n        idvar = idvar,\n        timevar = \"CI\",\n        direction = \"wide\",\n        v.names = c(ci_low, ci_high),\n        sep = \"_\"\n      )\n      row.names(x) <- NULL\n      if (remove_parameter) x$Parameter <- NULL\n    }\n\n    # Replace at the right place\n    ci_colname <- names(x)[\n      grepl(paste0(ci_low, \"_*\"), names(x)) |\n        grepl(paste0(ci_high, \"_*\"), names(x))\n    ]\n    colnames_1 <- names(x)[0:(ci_position - 1)][\n      !names(x)[0:(ci_position - 1)] %in% ci_colname\n    ]\n    colnames_2 <- names(x)[!names(x) %in% c(ci_colname, colnames_1)]\n    x <- x[c(colnames_1, ci_colname, colnames_2)]\n\n    # Wide to long --------------\n  } else {\n    if (\"Parameter\" %in% names(x)) {\n      remove_parameter <- FALSE\n    } else {\n      x$Parameter <- seq_len(nrow(x))\n      remove_parameter <- TRUE\n    }\n\n    lows <- grepl(paste0(ci_low, \"_*\"), names(x))\n    highs <- grepl(paste0(ci_high, \"_*\"), names(x))\n    ci <- as.numeric(gsub(paste0(ci_low, \"_\"), \"\", names(x)[lows]))\n    if (\n      paste(ci, collapse = \"-\") !=\n        paste(gsub(paste0(ci_high, \"_\"), \"\", names(x)[highs]), collapse = \"-\")\n    ) {\n      insight::format_error(\"Something went wrong in the CIs reshaping.\")\n      return(x)\n    }\n    if (sum(lows) > 1 && sum(highs) > 1) {\n      low <- stats::reshape(\n        x[!highs],\n        direction = \"long\",\n        varying = list(names(x)[lows]),\n        sep = \"_\",\n        timevar = \"CI\",\n        v.names = ci_low,\n        times = ci\n      )\n      high <- stats::reshape(\n        x[!lows],\n        direction = \"long\",\n        varying = list(names(x)[highs]),\n        sep = \"_\",\n        timevar = \"CI\",\n        v.names = ci_high,\n        times = ci\n      )\n      x <- merge(low, high)\n      x$id <- NULL\n      x <- x[order(x$Parameter), ]\n      row.names(x) <- NULL\n      if (remove_parameter) x$Parameter <- NULL\n    }\n\n    # Replace at the right place\n    ci_position <- which(lows)[1]\n    ci_colname <- c(\"CI\", ci_low, ci_high)\n    colnames_1 <- names(x)[0:(ci_position - 1)][\n      !names(x)[0:(ci_position - 1)] %in% ci_colname\n    ]\n    colnames_2 <- names(x)[!names(x) %in% c(ci_colname, colnames_1)]\n    x <- x[c(colnames_1, ci_colname, colnames_2)]\n  }\n\n  class(x) <- intersect(c(\"data.frame\", \"numeric\"), class(x))\n  x\n}\n"
  },
  {
    "path": "R/row_count.R",
    "content": "#' @title Count specific values row-wise\n#' @name row_count\n#' @description `row_count()` mimics base R's `rowSums()`, with sums for a\n#' specific value indicated by `count`. Hence, it is similar to\n#' `rowSums(x == count, na.rm = TRUE)`, but offers some more options, including\n#' strict comparisons. Comparisons using `==` coerce values to atomic vectors,\n#' thus both `2 == 2` and `\"2\" == 2` are `TRUE`. In `row_count()`, it is also\n#' possible to make \"type safe\" comparisons using the `allow_coercion` argument,\n#' where `\"2\" == 2` is not true.\n#'\n#' @param data A data frame with at least two columns, where number of specific\n#' values are counted row-wise.\n#' @param count The value for which the row sum should be computed. May be a\n#' numeric value, a character string (for factors or character vectors), `NA` or\n#' `Inf`.\n#' @param allow_coercion Logical. If `FALSE`, `count` matches only values of same\n#' class (i.e. when `count = 2`, the value `\"2\"` is not counted and vice versa).\n#' By default, when `allow_coercion = TRUE`, `count = 2` also matches `\"2\"`. In\n#' order to count factor levels in the data, use `count = factor(\"level\")`. See\n#' 'Examples'.\n#'\n#' @inheritParams extract_column_names\n#' @inheritParams row_means\n#'\n#' @return A vector with row-wise counts of values specified in `count`.\n#'\n#' @examples\n#' dat <- data.frame(\n#'   c1 = c(1, 2, NA, 4),\n#'   c2 = c(NA, 2, NA, 5),\n#'   c3 = c(NA, 4, NA, NA),\n#'   c4 = c(2, 3, 7, 8)\n#' )\n#'\n#' # count all 4s per row\n#' row_count(dat, count = 4)\n#' # count all missing values per row\n#' row_count(dat, count = NA)\n#'\n#' dat <- data.frame(\n#'   c1 = c(\"1\", \"2\", NA, \"3\"),\n#'   c2 = c(NA, \"2\", NA, \"3\"),\n#'   c3 = c(NA, 4, NA, NA),\n#'   c4 = c(2, 3, 7, Inf)\n#' )\n#' # count all 2s and \"2\"s per row\n#' row_count(dat, count = 2)\n#' # only count 2s, but not \"2\"s\n#' row_count(dat, count = 2, allow_coercion = FALSE)\n#'\n#' dat <- data.frame(\n#'   c1 = factor(c(\"1\", \"2\", NA, \"3\")),\n#'   c2 = c(\"2\", \"1\", NA, \"3\"),\n#'   c3 = c(NA, 4, NA, NA),\n#'   c4 = c(2, 3, 7, Inf)\n#' )\n#' # find only character \"2\"s\n#' row_count(dat, count = \"2\", allow_coercion = FALSE)\n#' # find only factor level \"2\"s\n#' row_count(dat, count = factor(\"2\"), allow_coercion = FALSE)\n#'\n#' @export\nrow_count <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  count = NULL,\n  allow_coercion = TRUE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  if (is.null(count)) {\n    insight::format_error(\n      \"`count` must be a valid value (including `NA` or `Inf`), but not `NULL`.\"\n    )\n  }\n\n  if (is.null(select) || length(select) == 0) {\n    insight::format_error(\"No columns selected.\")\n  }\n\n  data <- .coerce_to_dataframe(data[select])\n\n  # check if we have a data framme with at least two columns\n  if (nrow(data) < 1) {\n    insight::format_error(\"`data` must be a data frame with at least one row.\")\n  }\n\n  # check if we have a data framme with at least two columns\n  if (ncol(data) < 2) {\n    insight::format_error(\n      \"`data` must be a data frame with at least two numeric columns.\"\n    )\n  }\n  # special case: count missing\n  if (is.na(count)) {\n    rowSums(is.na(data))\n  } else {\n    # comparisons in R using == coerce values into a atomic vector, i.e.\n    # 2 == \"2\" is TRUE. If `allow_coercion = FALSE`, we only want 2 == 2 or\n    # \"2\" == \"2\" (i.e. we want exact types to be compared only)\n    if (isFALSE(allow_coercion)) {\n      # we need the \"type\" of the count-value - we use class() instead of typeof(),\n      # because the latter sometimes returns unsuitable classes/types. compare\n      # typeof(as.Date(\"2020-01-01\")), which returns \"double\".\n      count_type <- class(count)[1]\n      valid_columns <- vapply(data, inherits, TRUE, what = count_type)\n      # check if any columns left?\n      if (!any(valid_columns)) {\n        insight::format_error(\n          \"No column has same type as the value provided in `count`. Set `allow_coercion = TRUE` or specify a valid value for `count`.\"\n        ) # nolint\n      }\n      data <- data[valid_columns]\n    }\n    # coerce - we have only valid columns anyway, and we need to coerce factors\n    # to vectors, else comparison with `==` errors.\n    count <- as.vector(count)\n    # finally, count\n    rowSums(data == count, na.rm = TRUE)\n  }\n}\n"
  },
  {
    "path": "R/row_means.R",
    "content": "#' @title Row means or sums (optionally with minimum amount of valid values)\n#' @name row_means\n#' @description This function is similar to the SPSS `MEAN.n` or `SUM.n`\n#' function and computes row means or row sums from a data frame or matrix if at\n#' least `min_valid` values of a row are valid (and not `NA`).\n#'\n#' @param data A data frame with at least two columns, where row means or row\n#' sums are applied.\n#' @param min_valid Optional, a numeric value of length 1. May either be\n#' - a numeric value that indicates the amount of valid values per row to\n#'   calculate the row mean or row sum;\n#' - or a value between `0` and `1`, indicating a proportion of valid values per\n#'   row to calculate the row mean or row sum (see 'Details').\n#' - `NULL` (default), in which all cases are considered.\n#'\n#' If a row's sum of valid values is less than `min_valid`, `NA` will be returned.\n#' @param digits Numeric value indicating the number of decimal places to be\n#' used for rounding mean values. Negative values are allowed (see 'Details').\n#' By default, `digits = NULL` and no rounding is used.\n#' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) values\n#' before calculating row means or row sums. Only applies if `min_valid` is not\n#' specified.\n#' @param verbose Toggle warnings.\n#' @inheritParams extract_column_names\n#'\n#' @return A vector with row means (for `row_means()`) or row sums (for\n#' `row_sums()`) for those rows with at least `n` valid values.\n#'\n#' @details Rounding to a negative number of `digits` means rounding to a power\n#' of ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest\n#' hundred. For `min_valid`, if not `NULL`, `min_valid` must be a numeric value\n#' from `0` to `ncol(data)`. If a row in the data frame has at least `min_valid`\n#' non-missing values, the row mean or row sum is returned. If `min_valid` is a\n#' non-integer value from 0 to 1, `min_valid` is considered to indicate the\n#' proportion of required non-missing values per row. E.g., if\n#' `min_valid = 0.75`, a row must have at least `ncol(data) * min_valid`\n#' non-missing values for the row mean or row sum to be calculated. See\n#' 'Examples'.\n#'\n#' @examples\n#' dat <- data.frame(\n#'   c1 = c(1, 2, NA, 4),\n#'   c2 = c(NA, 2, NA, 5),\n#'   c3 = c(NA, 4, NA, NA),\n#'   c4 = c(2, 3, 7, 8)\n#' )\n#'\n#' # default, all means are shown, if no NA values are present\n#' row_means(dat)\n#'\n#' # remove all NA before computing row means\n#' row_means(dat, remove_na = TRUE)\n#'\n#' # needs at least 4 non-missing values per row\n#' row_means(dat, min_valid = 4) # 1 valid return value\n#' row_sums(dat, min_valid = 4) # 1 valid return value\n#'\n#' # needs at least 3 non-missing values per row\n#' row_means(dat, min_valid = 3) # 2 valid return values\n#'\n#' # needs at least 2 non-missing values per row\n#' row_means(dat, min_valid = 2)\n#'\n#' # needs at least 1 non-missing value per row, for two selected variables\n#' row_means(dat, select = c(\"c1\", \"c3\"), min_valid = 1)\n#'\n#' # needs at least 50% of non-missing values per row\n#' row_means(dat, min_valid = 0.5) # 3 valid return values\n#' row_sums(dat, min_valid = 0.5)\n#'\n#' # needs at least 75% of non-missing values per row\n#' row_means(dat, min_valid = 0.75) # 2 valid return values\n#'\n#' @export\nrow_means <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  min_valid = NULL,\n  digits = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  remove_na = FALSE,\n  verbose = TRUE\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # prepare data, sanity checks\n  data <- .prepare_row_data(data, select, min_valid, verbose)\n\n  # calculate row means\n  .row_sums_or_means(data, min_valid, digits, remove_na, fun = \"mean\")\n}\n\n\n#' @rdname row_means\n#' @export\nrow_sums <- function(\n  data,\n  select = NULL,\n  exclude = NULL,\n  min_valid = NULL,\n  digits = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  remove_na = FALSE,\n  verbose = TRUE\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    data,\n    exclude,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # prepare data, sanity checks\n  data <- .prepare_row_data(data, select, min_valid, verbose)\n\n  # calculate row sums\n  .row_sums_or_means(data, min_valid, digits, remove_na, fun = \"sum\")\n}\n\n\n# helper ------------------------\n\n# calculate row means or sums\n.row_sums_or_means <- function(data, min_valid, digits, remove_na, fun) {\n  if (is.null(min_valid)) {\n    # calculate row means or sums for complete data\n    out <- switch(\n      fun,\n      mean = rowMeans(data, na.rm = remove_na),\n      rowSums(data, na.rm = remove_na)\n    )\n  } else {\n    # is 'min_valid' indicating a proportion?\n    decimals <- min_valid %% 1\n    if (decimals != 0) {\n      min_valid <- round(ncol(data) * decimals)\n    }\n\n    # min_valid may not be larger as df's amount of columns\n    if (ncol(data) < min_valid) {\n      insight::format_error(\n        \"`min_valid` must be smaller or equal to number of columns in data frame.\"\n      )\n    }\n\n    # row means or sums\n    to_na <- rowSums(is.na(data)) > ncol(data) - min_valid\n    out <- switch(\n      fun,\n      mean = rowMeans(data, na.rm = TRUE),\n      rowSums(data, na.rm = TRUE)\n    )\n    out[to_na] <- NA\n  }\n\n  # round, if requested\n  if (!is.null(digits) && !all(is.na(digits))) {\n    out <- round(out, digits = digits)\n  }\n\n  out\n}\n\n\n# check that data is in shape for row means or row sums\n.prepare_row_data <- function(data, select, min_valid, verbose) {\n  if (is.null(select) || length(select) == 0) {\n    insight::format_error(\"No columns selected.\")\n  }\n\n  data <- .coerce_to_dataframe(data[select])\n\n  # n must be a numeric, non-missing value\n  if (\n    !is.null(min_valid) &&\n      (all(is.na(min_valid)) || !is.numeric(min_valid) || length(min_valid) > 1)\n  ) {\n    insight::format_error(\"`min_valid` must be a numeric value of length 1.\")\n  }\n\n  # make sure we only have numeric values\n  numeric_columns <- vapply(data, is.numeric, TRUE)\n  if (!all(numeric_columns)) {\n    if (verbose) {\n      insight::format_alert(\n        \"Only numeric columns are considered for calculation.\"\n      )\n    }\n    data <- data[numeric_columns]\n  }\n\n  # check if we have a data framme with at least two columns\n  if (ncol(data) < 2) {\n    insight::format_error(\n      \"`data` must be a data frame with at least two numeric columns.\"\n    )\n  }\n\n  data\n}\n"
  },
  {
    "path": "R/select_nse.R",
    "content": "# Code adapted from {poorman} by Nathan Eastwood [License: MIT]\n# https://github.com/nathaneastwood/poorman/blob/master/R/select_positions.R\n\n.select_nse <- function(\n  select,\n  data,\n  exclude,\n  ignore_case,\n  regex = FALSE,\n  remove_group_var = FALSE,\n  allow_rename = FALSE,\n  verbose = FALSE,\n  ifnotfound = \"warn\"\n) {\n  .check_data(data)\n  columns <- colnames(data)\n\n  # avoid conflicts\n  conflicting_packages <- .conflicting_packages(\"poorman\")\n  on.exit(.attach_packages(conflicting_packages))\n\n  expr_select <- substitute(select, env = parent.frame(1L))\n  expr_exclude <- substitute(exclude, env = parent.frame(1L))\n\n  # when exclude is not an argument called from the function (e.g data_to_long),\n  # do not consider \"exclude\" as a symbol\n  if (deparse(expr_exclude) == \"exclude\" && is.null(substitute(exclude))) {\n    expr_exclude <- NULL\n  }\n\n  # for grouped data frames, we can decide to remove group variable from selection\n  grp_vars <- setdiff(colnames(attr(data, \"groups\", exact = TRUE)), \".rows\")\n\n  # directly return all names if select == exclude == NULL\n  if (is.null(expr_select) && is.null(expr_exclude)) {\n    # don't include grouping variables\n    if (remove_group_var) {\n      columns <- setdiff(columns, grp_vars)\n    }\n    return(columns)\n  }\n\n  # get the position of columns that are selected or excluded\n  selected <- .eval_expr(\n    expr_select,\n    data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n  excluded <- .eval_expr(\n    expr_exclude,\n    data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n\n  selected_has_mix_idx <- any(selected < 0L) && any(selected > 0L)\n  excluded_has_mix_idx <- any(excluded < 0L) && any(excluded > 0L)\n\n  if (selected_has_mix_idx || excluded_has_mix_idx) {\n    insight::format_error(\n      \"You can't mix negative and positive indices in `select` or `exclude`.\"\n    )\n  }\n\n  # variable positions -> variable names\n  selected <- columns[selected]\n  excluded <- columns[excluded]\n\n  if (length(selected) == 0L) {\n    if (length(excluded) == 0L) {\n      out <- character(0L)\n    } else {\n      out <- setdiff(columns, excluded)\n    }\n  } else {\n    out <- setdiff(selected, excluded)\n  }\n\n  # don't include grouping variables\n  if (remove_group_var && length(out)) {\n    out <- setdiff(out, grp_vars)\n  }\n\n  # for named character vectors, we offer the service to rename the columns\n  if (allow_rename && typeof(expr_select) == \"language\") {\n    # safe evaluation of the expression, to get the named vector from \"select\"\n    new_names <- tryCatch(eval(expr_select), error = function(e) NULL)\n    # check if we really have a named vector\n    if (!is.null(new_names) && !is.null(names(new_names))) {\n      # if so, copy names\n      all_names <- names(new_names)\n      # if some of the elements don't have a name, we set the value as name\n      names(new_names)[!nzchar(all_names)] <- new_names[!nzchar(all_names)]\n      # after inclusion and exclusion, the original values in \"select\"\n      # may have changed, so we check that we only add names of valid values\n      out <- stats::setNames(out, names(new_names)[new_names %in% out])\n      # check if we have any duplicated names, and if so, give an error\n      if (anyDuplicated(names(out)) > 0) {\n        insight::format_error(paste0(\n          \"Following names are duplicated after renaming: \",\n          text_concatenate(names(out)[duplicated(names(out))], enclose = \"`\"),\n          \". Using duplicated names is no good practice and therefore discouraged. Please provide unique names.\"\n        ))\n      }\n    }\n  }\n\n  out\n}\n\n\n# This is where we dispatch the expression to several helper functions.\n# This function is called multiple times for expressions that are composed\n# of several symbols/language.\n#\n# Ex:\n# * \"cyl\" -> will go to .select_char() and will return directly\n# * cyl:gear -> function (`:`) so find which function it is, then get the\n#   position for each variable, then evaluate the function with the positions\n\n.eval_expr <- function(x, data, ignore_case, regex, verbose, ifnotfound) {\n  if (is.null(x)) {\n    return(NULL)\n  }\n\n  type <- typeof(x)\n\n  out <- switch(\n    type,\n    integer = x,\n    double = as.integer(x),\n    character = .select_char(\n      data,\n      x,\n      ignore_case,\n      regex = regex,\n      verbose,\n      ifnotfound\n    ),\n    symbol = .select_symbol(\n      data,\n      x,\n      ignore_case,\n      regex = regex,\n      verbose,\n      ifnotfound\n    ),\n    language = .eval_call(\n      data,\n      x,\n      ignore_case,\n      regex = regex,\n      verbose,\n      ifnotfound\n    ),\n    insight::format_error(paste0(\n      \"Expressions of type <\",\n      typeof(x),\n      \"> cannot be evaluated for use when subsetting.\"\n    ))\n  )\n\n  out\n}\n\n\n# Possibilities:\n# - quoted variable name\n# - quoted variable name with ignore case\n# - quoted variable name with colon, to indicate range\n# - character that should be regex-ed on variable names\n# - special word \"all\" to return all vars\n\n.select_char <- function(data, x, ignore_case, regex, verbose, ifnotfound) {\n  # use colnames because names() doesn't work for matrices\n  columns <- colnames(data)\n  if (isTRUE(regex)) {\n    # string is a regular expression\n    grep(x, columns)\n  } else if (length(x) == 1L && x == \"all\") {\n    # string is \"all\" - select all columns\n    seq_along(data)\n  } else if (any(grepl(\":\", x, fixed = TRUE))) {\n    # special pattern, as string (e.g.select = c(\"cyl:hp\", \"am\")). However,\n    # this will first go into `.eval_call()` and thus only single elements\n    # are passed in `x` - we have never a character *vector* here\n    # check for valid names\n    colon_vars <- unlist(strsplit(x, \":\", fixed = TRUE))\n    colon_match <- match(colon_vars, columns)\n    if (anyNA(colon_match)) {\n      .action_if_not_found(\n        colon_vars,\n        columns,\n        colon_match,\n        verbose,\n        ifnotfound\n      )\n      matches <- NA\n    } else {\n      start_pos <- match(colon_vars[1], columns)\n      end_pos <- match(colon_vars[2], columns)\n      if (!is.na(start_pos) && !is.na(end_pos)) {\n        matches <- start_pos:end_pos\n      } else {\n        matches <- NA\n      }\n    }\n    matches[!is.na(matches)]\n  } else if (isTRUE(ignore_case)) {\n    # find columns, case insensitive\n    matches <- match(toupper(x), toupper(columns))\n    matches[!is.na(matches)]\n  } else {\n    # find columns, case sensitive\n    matches <- match(x, columns)\n    if (anyNA(matches)) {\n      .action_if_not_found(x, columns, matches, verbose, ifnotfound)\n    }\n    matches[!is.na(matches)]\n  }\n}\n\n# small helper, to avoid duplicated code\n\n.action_if_not_found <- function(x, columns, matches, verbose, ifnotfound) {\n  msg <- paste0(\n    \"Following variable(s) were not found: \",\n    toString(x[is.na(matches)])\n  )\n  msg2 <- .misspelled_string(\n    columns,\n    x[is.na(matches)],\n    default_message = \"Possibly misspelled?\"\n  )\n  if (ifnotfound == \"error\") {\n    insight::format_error(msg, msg2)\n  }\n  if (ifnotfound == \"warn\" && verbose) {\n    insight::format_warning(msg, msg2)\n  }\n}\n\n\n# 3 types of symbols:\n# - unquoted variables\n# - objects that need to be evaluated, e.g data_find(iris, i) where\n#   i is a function arg or is defined before. This can also be a\n#   vector of names or positions.\n# - functions (without parenthesis)\n\n# The first case is easy to deal with.\n# For the 2nd one, we try to get the value of the object at each environment\n# (starting from the lower one) until the global environment. If we get its\n# value but it errors because the function doesn't exist then it means that\n# it is a select helper that we grab from the error message.\n\n.select_symbol <- function(data, x, ignore_case, regex, verbose, ifnotfound) {\n  # We use `tryCatch()` instead of `try()` here, because for grouped data frame\n  # methods, `.dynEval()` can be called many times. Since `tryCatch()` is minimal\n  # faster than `try()`, we get a performance \"boost\" of some seconds for large\n  # data frames with many groups (see https://github.com/easystats/datawizard/pull/657/)\n  try_eval <- tryCatch(eval(x), error = function(e) NULL)\n  x_dep <- insight::safe_deparse(x)\n  is_select_helper <- FALSE\n  out <- NULL\n\n  if (x_dep %in% colnames(data)) {\n    matches <- match(x_dep, colnames(data))\n    out <- matches[!is.na(matches)]\n  } else if (isTRUE(ignore_case)) {\n    matches <- match(toupper(x_dep), toupper(colnames(data)))\n    out <- matches[!is.na(matches)]\n  } else {\n    new_expr <- tryCatch(\n      .dynGet(x, inherits = FALSE, minframe = 0L),\n      error = function(e) {\n        # if starts_with() et al. don't exist\n        fn <- insight::safe_deparse(e$call)\n\n        # if starts_with() et al. come from tidyselect but need to be used in\n        # a select environment, then the error doesn't have the same structure.\n        if (\n          is.null(fn) && grepl(\"must be used within a\", e$message, fixed = TRUE)\n        ) {\n          call_trace <- lapply(e$trace$call, function(x) {\n            tmp <- insight::safe_deparse(x)\n            if (grepl(paste0(\"^\", .regex_select_helper()), tmp)) {\n              tmp\n            }\n          })\n          fn <- Filter(Negate(is.null), call_trace)[1]\n        }\n        # if we actually obtain the select helper call, return it, else return\n        # what we already had\n        if (length(fn) > 0L && grepl(.regex_select_helper(), fn)) {\n          is_select_helper <<- TRUE\n          return(fn)\n        }\n        NULL\n      }\n    )\n\n    # when \"x\" is a function arg which is itself a function call to evaluate,\n    # .dynGet can return \"x\" infinitely so we try to evaluate this arg\n    # see #414\n    if (!is.null(new_expr) && insight::safe_deparse(new_expr) == \"x\") {\n      new_expr <- .dynEval(\n        x,\n        inherits = FALSE,\n        minframe = 0L,\n        remove_n_top_env = 4\n      )\n    }\n\n    if (is_select_helper) {\n      new_expr <- str2lang(unlist(new_expr, use.names = FALSE))\n      out <- .eval_expr(\n        new_expr,\n        data = data,\n        ignore_case = ignore_case,\n        regex = regex,\n        verbose = verbose,\n        ifnotfound = ifnotfound\n      )\n    } else if (length(new_expr) == 1L && is.function(new_expr)) {\n      out <- which(vapply(data, new_expr, FUN.VALUE = logical(1L)))\n    } else {\n      out <- unlist(\n        lapply(\n          new_expr,\n          .eval_expr,\n          data = data,\n          ignore_case = ignore_case,\n          regex = regex,\n          verbose = verbose,\n          ifnotfound = ifnotfound\n        ),\n        use.names = FALSE\n      )\n    }\n  }\n\n  # sometimes an object that needs to be evaluated has the same name as a\n  # function (e.g `colnames`). Vector of names have the priority on functions\n  # so function evaluation is delayed at the max.\n  if (is.null(out) && is.function(try_eval)) {\n    cols <- names(data)\n    out <- which(vapply(data, x, FUN.VALUE = logical(1L)))\n  }\n\n  out\n}\n\n# Dispatch expressions to various select helpers according to the function call.\n\n.eval_call <- function(data, x, ignore_case, regex, verbose, ifnotfound) {\n  type <- insight::safe_deparse(x[[1]])\n  switch(\n    type,\n    `:` = .select_seq(x, data, ignore_case, regex, verbose, ifnotfound),\n    `-` = .select_minus(x, data, ignore_case, regex, verbose, ifnotfound),\n    `c` = .select_c(x, data, ignore_case, regex, verbose, ifnotfound), # nolint\n    `(` = .select_bracket(x, data, ignore_case, regex, verbose, ifnotfound),\n    `[` = .select_square_bracket(\n      x,\n      data,\n      ignore_case,\n      regex,\n      verbose,\n      ifnotfound\n    ),\n    `$` = .select_dollar(x, data, ignore_case, regex, verbose, ifnotfound),\n    `~` = .select_tilde(x, data, ignore_case, regex, verbose, ifnotfound),\n    list = .select_list(x, data, ignore_case, regex, verbose, ifnotfound),\n    names = .select_names(x, data, ignore_case, regex, verbose, ifnotfound),\n    starts_with = ,\n    ends_with = ,\n    matches = ,\n    contains = ,\n    regex = .select_helper(x, data, ignore_case, regex, verbose, ifnotfound),\n    .select_context(x, data, ignore_case, regex, verbose, ifnotfound)\n  )\n}\n\n# e.g 1:3, or gear:cyl\n.select_seq <- function(expr, data, ignore_case, regex, verbose, ifnotfound) {\n  x <- .eval_expr(\n    expr[[2]],\n    data = data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n  y <- .eval_expr(\n    expr[[3]],\n    data = data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n  x:y\n}\n\n# e.g -cyl\n.select_minus <- function(expr, data, ignore_case, regex, verbose, ifnotfound) {\n  x <- .eval_expr(\n    expr[[2]],\n    data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n  if (length(x) == 0L) {\n    seq_along(data)\n  } else {\n    x * -1L\n  }\n}\n\n# e.g c(\"gear\", \"cyl\")\n.select_c <- function(expr, data, ignore_case, regex, verbose, ifnotfound) {\n  lst_expr <- as.list(expr)\n  lst_expr[[1]] <- NULL\n  unlist(\n    lapply(\n      lst_expr,\n      .eval_expr,\n      data,\n      ignore_case = ignore_case,\n      regex = regex,\n      verbose = verbose,\n      ifnotfound = ifnotfound\n    ),\n    use.names = FALSE\n  )\n}\n\n# e.g -(gear:cyl)\n.select_bracket <- function(\n  expr,\n  data,\n  ignore_case,\n  regex,\n  verbose,\n  ifnotfound\n) {\n  .eval_expr(\n    expr[[2]],\n    data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n}\n\n# e.g myvector[3]\n.select_square_bracket <- function(\n  expr,\n  data,\n  ignore_case,\n  regex,\n  verbose,\n  ifnotfound\n) {\n  first_obj <- .eval_expr(\n    expr[[2]],\n    data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n  .eval_expr(\n    first_obj[eval(expr[[3]])],\n    data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n}\n\n.select_names <- function(expr, data, ignore_case, regex, verbose, ifnotfound) {\n  first_obj <- .dynEval(expr, inherits = FALSE, minframe = 0L)\n  .eval_expr(\n    first_obj,\n    data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = FALSE,\n    ifnotfound = ifnotfound\n  )\n}\n\n# e.g starts_with(\"Sep\")\n.select_helper <- function(\n  expr,\n  data,\n  ignore_case,\n  regex,\n  verbose,\n  ifnotfound\n) {\n  lst_expr <- as.list(expr)\n\n  # need this if condition to distinguish between starts_with(\"Sep\") (that we\n  # can use directly) and starts_with(i) (where we need to get i)\n  if (length(lst_expr) == 2L && typeof(lst_expr[[2]]) == \"symbol\") {\n    collapsed_patterns <- .dynGet(\n      lst_expr[[2]],\n      inherits = FALSE,\n      minframe = 0L\n    )\n  } else {\n    collapsed_patterns <- paste(\n      unlist(lst_expr[2:length(lst_expr)]),\n      collapse = \"|\"\n    )\n  }\n\n  helper <- insight::safe_deparse(lst_expr[[1]])\n\n  rgx <- switch(\n    helper,\n    starts_with = paste0(\"^(\", collapsed_patterns, \")\"),\n    ends_with = paste0(\"(\", collapsed_patterns, \")$\"),\n    contains = paste0(\"(\", collapsed_patterns, \")\"),\n    regex = collapsed_patterns,\n    insight::format_error(\"There is no select helper called '\", helper, \"'.\")\n  )\n  # starting in R 4.5, grep() errors if some logical args have NULL/NA\n  if (is.null(ignore_case)) {\n    ignore_case <- FALSE\n  }\n  grep(rgx, colnames(data), ignore.case = ignore_case)\n}\n\n# e.g args$select (happens when we use grouped_data (see center.grouped_df()))\n.select_dollar <- function(\n  expr,\n  data,\n  ignore_case,\n  regex,\n  verbose,\n  ifnotfound\n) {\n  first_obj <- .dynGet(\n    expr[[2]],\n    ifnotfound = NULL,\n    inherits = FALSE,\n    minframe = 0L\n  )\n  if (is.null(first_obj)) {\n    first_obj <- .dynEval(expr[[2]], inherits = FALSE, minframe = 0L)\n  }\n  .eval_expr(\n    first_obj[[deparse(expr[[3]])]],\n    data,\n    ignore_case = ignore_case,\n    regex = regex,\n    verbose = verbose,\n    ifnotfound = ifnotfound\n  )\n}\n\n# e.g ~ gear + cyl\n.select_tilde <- function(expr, data, ignore_case, regex, verbose, ifnotfound) {\n  vars <- all.vars(expr)\n  unlist(\n    lapply(\n      vars,\n      .eval_expr,\n      data = data,\n      ignore_case = ignore_case,\n      regex = regex,\n      verbose = verbose,\n      ifnotfound = ifnotfound\n    ),\n    use.names = FALSE\n  )\n}\n\n# e.g list(gear = 4, cyl = 5)\n.select_list <- function(expr, data, ignore_case, regex, verbose, ifnotfound) {\n  vars <- names(.dynEval(expr, inherits = FALSE, minframe = 0L))\n  unlist(\n    lapply(\n      vars,\n      .eval_expr,\n      data = data,\n      ignore_case = ignore_case,\n      regex = regex,\n      verbose = verbose,\n      ifnotfound = ifnotfound\n    ),\n    use.names = FALSE\n  )\n}\n\n# e.g is.numeric()\n.select_context <- function(\n  expr,\n  data,\n  ignore_case,\n  regex,\n  verbose,\n  ifnotfound\n) {\n  x_dep <- insight::safe_deparse(expr)\n  if (endsWith(x_dep, \"()\")) {\n    new_expr <- gsub(\"\\\\(\\\\)$\", \"\", x_dep)\n    new_expr <- str2lang(new_expr)\n    .eval_expr(\n      new_expr,\n      data = data,\n      ignore_case = ignore_case,\n      regex = regex,\n      verbose = verbose,\n      ifnotfound = ifnotfound\n    )\n  } else {\n    out <- .dynEval(expr, inherits = FALSE, minframe = 0L)\n    .eval_expr(\n      out,\n      data = data,\n      ignore_case = ignore_case,\n      regex = regex,\n      verbose = verbose,\n      ifnotfound = ifnotfound\n    )\n  }\n}\n\n# -------------------------------------\n\n.check_data <- function(data) {\n  if (is.null(data)) {\n    insight::format_error(\"The `data` argument must be provided.\")\n  }\n  .coerce_to_dataframe(data)\n}\n\n.regex_select_helper <- function() {\n  \"(starts\\\\_with|ends\\\\_with|col\\\\_ends\\\\_with|contains|regex)\"\n}\n\n.conflicting_packages <- function(packages = NULL) {\n  if (is.null(packages)) {\n    packages <- \"poorman\"\n  }\n\n  namespace <- vapply(packages, isNamespaceLoaded, FUN.VALUE = logical(1L))\n  attached <- paste0(\"package:\", packages) %in% search()\n  attached <- stats::setNames(attached, packages)\n\n  for (i in packages) {\n    unloadNamespace(i)\n  }\n\n  list(package = packages, namespace = namespace, attached = attached)\n}\n\n.attach_packages <- function(packages = NULL) {\n  if (!is.null(packages)) {\n    pkg <- packages$package\n    for (i in seq_along(pkg)) {\n      if (isTRUE(packages$namespace[i])) {\n        loadNamespace(pkg[i])\n      }\n      if (isTRUE(packages$attached[i])) {\n        suppressPackageStartupMessages(\n          suppressWarnings(\n            require(pkg[i], quietly = TRUE, character.only = TRUE)\n          )\n        )\n      }\n    }\n  }\n}\n\n# Almost identical to dynGet(). The difference is that we deparse the expression\n# because get0() allows symbol only since R 4.1.0\n.dynGet <- function(\n  x,\n  ifnotfound = stop(\n    gettextf(\"%s not found\", sQuote(x)),\n    domain = NA,\n    call. = FALSE\n  ),\n  minframe = 1L,\n  inherits = FALSE\n) {\n  x <- insight::safe_deparse(x)\n  n <- sys.nframe()\n  myObj <- structure(list(.b = as.raw(7)), foo = 47L)\n  while (n > minframe) {\n    n <- n - 1L\n    env <- sys.frame(n)\n    r <- get0(x, envir = env, inherits = inherits, ifnotfound = myObj)\n    if (!identical(r, myObj)) {\n      return(r)\n    }\n  }\n  ifnotfound\n}\n\n# Similar to .dynGet() but instead of getting an object from the environment,\n# we try to evaluate an expression. It stops as soon as the evaluation doesn't\n# error. Returns NULL if can never be evaluated.\n#\n# Custom arg \"remove_n_top_env\" to remove the first environments which are\n# \".select_nse()\" and the other custom functions.\n#\n# Arg \"data\" is here if we want to start searching in the data instead of the\n# lowest environment.\n.dynEval <- function(\n  x,\n  ifnotfound = stop(\n    gettextf(\"%s not found\", sQuote(x)),\n    domain = NA,\n    call. = FALSE\n  ),\n  minframe = 1L,\n  inherits = FALSE,\n  remove_n_top_env = 0,\n  data = NULL\n) {\n  iter <- 0\n  n <- sys.nframe() - remove_n_top_env\n  x <- insight::safe_deparse(x)\n  while (n > minframe) {\n    if (iter == 0 && !is.null(data)) {\n      env <- data\n      iter <- iter + 1\n    } else {\n      n <- n - 1L\n      env <- sys.frame(n)\n    }\n    # We use `tryCatch()` instead of `try()` here, because for grouped data frame\n    # methods, `.dynEval()` can be called many times. Since `tryCatch()` is minimal\n    # faster than `try()`, we get a performance \"boost\" of some seconds for large\n    # data frames with many groups (see https://github.com/easystats/datawizard/pull/657/)\n    r <- tryCatch(eval(str2lang(x), envir = env), error = function(e) NULL)\n    if (!is.null(r)) {\n      return(r)\n    }\n  }\n  ifnotfound\n}\n"
  },
  {
    "path": "R/skewness_kurtosis.R",
    "content": "#' Compute Skewness and (Excess) Kurtosis\n#'\n#' @param x A numeric vector or data.frame.\n#' @param type Type of algorithm for computing skewness. May be one of `1`\n#'   (or `\"1\"`, `\"I\"` or `\"classic\"`), `2` (or `\"2\"`,\n#'   `\"II\"` or `\"SPSS\"` or `\"SAS\"`) or `3` (or  `\"3\"`,\n#'   `\"III\"` or `\"Minitab\"`). See 'Details'.\n#' @param iterations The number of bootstrap replicates for computing standard\n#'   errors. If `NULL` (default), parametric standard errors are computed.\n#' @param test Logical, if `TRUE`, tests if skewness or kurtosis is\n#'   significantly different from zero.\n#' @param digits Number of decimal places.\n#' @param object An object returned by `skewness()` or `kurtosis()`.\n#' @param verbose Toggle warnings and messages.\n#' @param ... Arguments passed to or from other methods.\n#' @inheritParams coef_var\n#'\n#' @details\n#'\n#' \\subsection{Skewness}{\n#' Symmetric distributions have a `skewness` around zero, while\n#' a negative skewness values indicates a \"left-skewed\" distribution, and a\n#' positive skewness values indicates a \"right-skewed\" distribution. Examples\n#' for the relationship of skewness and distributions are:\n#'\n#'   - Normal distribution (and other symmetric distribution) has a skewness\n#'   of 0\n#'   - Half-normal distribution has a skewness just below 1\n#'   - Exponential distribution has a skewness of 2\n#'   - Lognormal distribution can have a skewness of any positive value,\n#'   depending on its parameters\n#'\n#' (\\cite{https://en.wikipedia.org/wiki/Skewness})\n#' }\n#'\n#' \\subsection{Types of Skewness}{\n#' `skewness()` supports three different methods for estimating skewness,\n#' as discussed in \\cite{Joanes and Gill (1988)}:\n#'\n#' - Type \"1\" is the \"classical\" method, which is `g1 = (sum((x -\n#' mean(x))^3) / n) / (sum((x - mean(x))^2) / n)^1.5`\n#'\n#' - Type \"2\" first calculates the type-1 skewness, then adjusts the result:\n#' `G1 = g1 * sqrt(n * (n - 1)) / (n - 2)`. This is what SAS and SPSS\n#' usually return.\n#'\n#' - Type \"3\" first calculates the type-1 skewness, then adjusts the result:\n#' `b1 = g1 * ((1 - 1 / n))^1.5`. This is what Minitab usually returns.\n#' }\n#'\n#' \\subsection{Kurtosis}{\n#' The `kurtosis` is a measure of \"tailedness\" of a distribution. A\n#' distribution with a kurtosis values of about zero is called \"mesokurtic\". A\n#' kurtosis value larger than zero indicates a \"leptokurtic\" distribution with\n#' *fatter* tails. A kurtosis value below zero indicates a \"platykurtic\"\n#' distribution with *thinner* tails\n#' (\\cite{https://en.wikipedia.org/wiki/Kurtosis}).\n#' }\n#'\n#' \\subsection{Types of Kurtosis}{\n#' `kurtosis()` supports three different methods for estimating kurtosis,\n#' as discussed in \\cite{Joanes and Gill (1988)}:\n#'\n#' - Type \"1\" is the \"classical\" method, which is `g2 = n * sum((x -\n#' mean(x))^4) / (sum((x - mean(x))^2)^2) - 3`.\n#'\n#' - Type \"2\" first calculates the type-1 kurtosis, then adjusts the result:\n#' `G2 = ((n + 1) * g2 + 6) * (n - 1)/((n - 2) * (n - 3))`. This is what\n#' SAS and SPSS usually return\n#'\n#' - Type \"3\" first calculates the type-1 kurtosis, then adjusts the result:\n#' `b2 = (g2 + 3) * (1 - 1 / n)^2 - 3`. This is what Minitab usually\n#' returns.\n#'\n#' }\n#'\n#' \\subsection{Standard Errors}{\n#' It is recommended to compute empirical (bootstrapped) standard errors (via\n#' the `iterations` argument) than relying on analytic standard errors\n#' (\\cite{Wright & Herrington, 2011}).\n#' }\n#'\n#' @references\n#'\n#' - D. N. Joanes and C. A. Gill (1998). Comparing measures of sample\n#'   skewness and kurtosis. The Statistician, 47, 183–189.\n#'\n#' - Wright, D. B., & Herrington, J. A. (2011). Problematic standard\n#'   errors and confidence intervals for skewness and kurtosis. Behavior\n#'   research methods, 43(1), 8-17.\n#'\n#' @return Values of skewness or kurtosis.\n#'\n#' @examples\n#' skewness(rnorm(1000))\n#' kurtosis(rnorm(1000))\n#' @export\nskewness <- function(x, ...) {\n  UseMethod(\"skewness\")\n}\n\n\n# skewness -----------------------------------------\n\n#' @rdname skewness\n#' @export\nskewness.numeric <- function(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  verbose = TRUE,\n  ...\n) {\n  if (remove_na) {\n    x <- x[!is.na(x)]\n  }\n  n <- length(x)\n  out <- (sum((x - mean(x))^3) / n) / (sum((x - mean(x))^2) / n)^1.5\n\n  type <- .check_skewness_type(type)\n\n  if (type == \"2\" && n < 3) {\n    if (verbose) {\n      insight::format_warning(\n        \"Need at least 3 complete observations for type-2-skewness. Using 'type=\\\"1\\\"' now.\"\n      )\n    }\n    type <- \"1\"\n  }\n\n  .skewness <- switch(\n    type,\n    \"1\" = out,\n    \"2\" = out * sqrt(n * (n - 1)) / (n - 2),\n    \"3\" = out * ((1 - 1 / n))^1.5\n  )\n\n  out_se <- sqrt((6 * (n - 2)) / ((n + 1) * (n + 3)))\n\n  .skewness_se <- switch(\n    type,\n    \"1\" = out_se,\n    \"2\" = out_se * ((sqrt(n * (n - 1))) / (n - 2)),\n    \"3\" = out_se * (((n - 1) / n)^1.5),\n  )\n\n  if (!is.null(iterations)) {\n    if (requireNamespace(\"boot\", quietly = TRUE)) {\n      results <- boot::boot(\n        data = x,\n        statistic = .boot_skewness,\n        R = iterations,\n        remove_na = remove_na,\n        type = type\n      )\n      out_se <- stats::sd(results$t, na.rm = TRUE)\n    } else {\n      insight::format_warning(\"Package 'boot' needed for bootstrapping SEs.\")\n    }\n  }\n\n  .skewness <- data.frame(\n    Skewness = .skewness,\n    SE = out_se\n  )\n  class(.skewness) <- unique(c(\"parameters_skewness\", class(.skewness)))\n  .skewness\n}\n\n\n#' @export\nskewness.matrix <- function(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  ...\n) {\n  .skewness <- apply(\n    x,\n    2,\n    skewness,\n    remove_na = remove_na,\n    type = type,\n    iterations = iterations\n  )\n\n  .names <- colnames(x)\n\n  if (length(.names) == 0) {\n    .names <- paste0(\"X\", seq_len(ncol(x)))\n  }\n\n  .skewness <- cbind(Parameter = .names, do.call(rbind, .skewness))\n\n  class(.skewness) <- unique(c(\"parameters_skewness\", class(.skewness)))\n  .skewness\n}\n\n\n#' @export\nskewness.data.frame <- function(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  ...\n) {\n  .skewness <- lapply(\n    x,\n    skewness,\n    remove_na = remove_na,\n    type = type,\n    iterations = iterations\n  )\n\n  .skewness <- cbind(Parameter = names(.skewness), do.call(rbind, .skewness))\n\n  class(.skewness) <- unique(c(\"parameters_skewness\", class(.skewness)))\n  .skewness\n}\n\n\n#' @export\nskewness.default <- function(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  ...\n) {\n  skewness(\n    .factor_to_numeric(x),\n    remove_na = remove_na,\n    type = type,\n    iterations = iterations\n  )\n}\n\n\n# Kurtosis -----------------------------------\n\n#' @rdname skewness\n#' @export\nkurtosis <- function(x, ...) {\n  UseMethod(\"kurtosis\")\n}\n\n\n#' @rdname skewness\n#' @export\nkurtosis.numeric <- function(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  verbose = TRUE,\n  ...\n) {\n  if (remove_na) {\n    x <- x[!is.na(x)]\n  }\n  n <- length(x)\n  out <- n * sum((x - mean(x))^4) / (sum((x - mean(x))^2)^2)\n\n  type <- .check_skewness_type(type)\n\n  if (type == \"2\" && n < 4) {\n    if (verbose) {\n      insight::format_warning(\n        \"Need at least 4 complete observations for type-2-kurtosis Using 'type=\\\"1\\\"' now.\"\n      )\n    }\n    type <- \"1\"\n  }\n\n  .kurtosis <- switch(\n    type,\n    \"1\" = out - 3,\n    \"2\" = ((n + 1) * (out - 3) + 6) * (n - 1) / ((n - 2) * (n - 3)),\n    \"3\" = out * (1 - 1 / n)^2 - 3\n  )\n\n  out_se <- sqrt(\n    (24 * n * (n - 2) * (n - 3)) / (((n + 1)^2) * (n + 3) * (n + 5))\n  )\n\n  .kurtosis_se <- switch(\n    type,\n    \"1\" = out_se,\n    \"2\" = out_se * (((n - 1) * (n + 1)) / ((n - 2) * (n - 3))),\n    \"3\" = out_se * ((n - 1) / n)^2\n  )\n\n  if (!is.null(iterations)) {\n    insight::check_if_installed(\"boot\")\n\n    results <- boot::boot(\n      data = x,\n      statistic = .boot_kurtosis,\n      R = iterations,\n      remove_na = remove_na,\n      type = type\n    )\n    out_se <- stats::sd(results$t, na.rm = TRUE)\n  }\n\n  .kurtosis <- data.frame(\n    Kurtosis = .kurtosis,\n    SE = out_se\n  )\n  class(.kurtosis) <- unique(c(\"parameters_kurtosis\", class(.kurtosis)))\n  .kurtosis\n}\n\n\n#' @export\nkurtosis.matrix <- function(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  ...\n) {\n  .kurtosis <- apply(\n    x,\n    2,\n    kurtosis,\n    remove_na = remove_na,\n    type = type,\n    iterations = iterations\n  )\n  .names <- colnames(x)\n  if (length(.names) == 0) {\n    .names <- paste0(\"X\", seq_len(ncol(x)))\n  }\n  .kurtosis <- cbind(Parameter = .names, do.call(rbind, .kurtosis))\n  class(.kurtosis) <- unique(c(\"parameters_kurtosis\", class(.kurtosis)))\n  .kurtosis\n}\n\n\n#' @export\nkurtosis.data.frame <- function(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  ...\n) {\n  .kurtosis <- lapply(\n    x,\n    kurtosis,\n    remove_na = remove_na,\n    type = type,\n    iterations = iterations\n  )\n  .kurtosis <- cbind(Parameter = names(.kurtosis), do.call(rbind, .kurtosis))\n  class(.kurtosis) <- unique(c(\"parameters_kurtosis\", class(.kurtosis)))\n  .kurtosis\n}\n\n\n#' @export\nkurtosis.default <- function(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  ...\n) {\n  kurtosis(\n    .factor_to_numeric(x),\n    remove_na = remove_na,\n    type = type,\n    iterations = iterations\n  )\n}\n\n\n# methods -----------------------------------------\n\n#' @export\nas.numeric.parameters_kurtosis <- function(x, ...) {\n  x$Kurtosis\n}\n\n#' @export\nas.numeric.parameters_skewness <- function(x, ...) {\n  x$Skewness\n}\n\n#' @export\nas.double.parameters_kurtosis <- as.numeric.parameters_kurtosis\n\n#' @export\nas.double.parameters_skewness <- as.numeric.parameters_skewness\n\n#' @rdname skewness\n#' @export\nprint.parameters_kurtosis <- function(x, digits = 3, test = FALSE, ...) {\n  out <- summary(x, test = test)\n  cat(insight::export_table(out, digits = digits))\n  invisible(x)\n}\n\n#' @rdname skewness\n#' @export\nprint.parameters_skewness <- print.parameters_kurtosis\n\n#' @rdname skewness\n#' @export\nsummary.parameters_skewness <- function(object, test = FALSE, ...) {\n  if (test) {\n    object$z <- object$Skewness / object$SE\n    object$p <- 2 * (1 - stats::pnorm(abs(object$z)))\n  }\n  object\n}\n\n#' @rdname skewness\n#' @export\nsummary.parameters_kurtosis <- function(object, test = FALSE, ...) {\n  if (test) {\n    object$z <- object$Kurtosis / object$SE\n    object$p <- 2 * (1 - stats::pnorm(abs(object$z)))\n  }\n  object\n}\n\n# helper ------------------------------------------\n\n.check_skewness_type <- function(type) {\n  # convenience\n  if (is.numeric(type)) {\n    type <- as.character(type)\n  }\n  skewness_types <- c(\n    \"1\",\n    \"2\",\n    \"3\",\n    \"I\",\n    \"II\",\n    \"III\",\n    \"classic\",\n    \"SPSS\",\n    \"SAS\",\n    \"Minitab\"\n  )\n  is_skewness_type_invalid <- is.null(type) ||\n    is.na(type) ||\n    !(type %in% skewness_types)\n\n  if (is_skewness_type_invalid) {\n    insight::format_warning(\n      \"'type' must be a character value from \\\"1\\\" to \\\"3\\\". Using 'type=\\\"2\\\"' now.\"\n    )\n    type <- \"2\"\n  }\n\n  switch(\n    type,\n    `1` = ,\n    I = ,\n    classic = \"1\",\n    `2` = ,\n    II = ,\n    SPSS = ,\n    SAS = \"2\",\n    `3` = ,\n    III = ,\n    Minitab = \"3\"\n  )\n}\n\n\n# bootstrapping -----------------------------------\n\n.boot_skewness <- function(data, indices, remove_na, type) {\n  datawizard::skewness(\n    data[indices],\n    remove_na = remove_na,\n    type = type,\n    iterations = NULL\n  )$Skewness\n}\n\n\n.boot_kurtosis <- function(data, indices, remove_na, type) {\n  datawizard::kurtosis(\n    data[indices],\n    remove_na = remove_na,\n    type = type,\n    iterations = NULL\n  )$Kurtosis\n}\n"
  },
  {
    "path": "R/slide.R",
    "content": "#' @title Shift numeric value range\n#' @name slide\n#'\n#' @description\n#' This functions shifts the value range of a numeric variable, so that the\n#' new range starts at a given value.\n#'\n#' @param x A data frame or numeric vector.\n#' @param verbose Toggle warnings.\n#' @param ... not used.\n#' @inheritParams to_numeric\n#'\n#' @return `x`, where the range of numeric variables starts at a new value.\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @inherit data_rename seealso\n#'\n#' @examples\n#' # numeric\n#' head(mtcars$gear)\n#' head(slide(mtcars$gear))\n#' head(slide(mtcars$gear, lowest = 10))\n#'\n#' # data frame\n#' sapply(slide(mtcars, lowest = 1), min)\n#' sapply(mtcars, min)\n#' @export\nslide <- function(x, ...) {\n  UseMethod(\"slide\")\n}\n\n\n#' @export\nslide.default <- function(x, lowest = 0, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      \"Shifting non-numeric variables is not possible.\",\n      \"Try using 'to_numeric()' and specify the 'lowest' argument.\"\n    )\n  }\n  x\n}\n\n\n#' @rdname slide\n#' @export\nslide.numeric <- function(x, lowest = 0, ...) {\n  original_x <- x\n  minval <- min(x, na.rm = TRUE)\n  difference <- minval - lowest\n  x <- x - difference\n  .set_back_labels(x, original_x, include_values = FALSE)\n}\n\n\n#' @rdname slide\n#' @export\nslide.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  lowest = 0,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_s\",\n      keep_factors = FALSE\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  x[select] <- lapply(\n    x[select],\n    slide,\n    lowest = lowest,\n    verbose = verbose,\n    ...\n  )\n\n  x\n}\n"
  },
  {
    "path": "R/smoothness.R",
    "content": "#' Quantify the smoothness of a vector\n#'\n#' @param x Numeric vector (similar to a time series).\n#' @param method Can be `\"diff\"` (the standard deviation of the standardized\n#'   differences) or `\"cor\"` (default, lag-one autocorrelation).\n#' @param lag An integer indicating which lag to use. If less than `1`, will be\n#'   interpreted as expressed in percentage of the length of the vector.\n#' @inheritParams skewness\n#'\n#' @examples\n#' x <- (-10:10)^3 + rnorm(21, 0, 100)\n#' plot(x)\n#' smoothness(x, method = \"cor\")\n#' smoothness(x, method = \"diff\")\n#' @return Value of smoothness.\n#' @references https://stats.stackexchange.com/questions/24607/how-to-measure-smoothness-of-a-time-series-in-r\n#'\n#' @export\nsmoothness <- function(x, method = \"cor\", lag = 1, iterations = NULL, ...) {\n  UseMethod(\"smoothness\")\n}\n\n\n#' @export\nsmoothness.numeric <- function(\n  x,\n  method = \"cor\",\n  lag = 1,\n  iterations = NULL,\n  ...\n) {\n  if (lag < 1) {\n    lag <- round(lag * length(x))\n  }\n  if (lag <= 0) {\n    insight::format_error(\"'lag' cannot be that small.\")\n  }\n\n  if (method == \"cor\") {\n    smooth_data <- stats::cor(\n      utils::head(x, length(x) - lag),\n      utils::tail(x, length(x) - lag)\n    )\n  } else {\n    smooth_data <- stats::sd(diff(x, lag = lag)) / abs(mean(diff(x, lag = lag)))\n  }\n\n  if (!is.null(iterations)) {\n    if (requireNamespace(\"boot\", quietly = TRUE)) {\n      results <- boot::boot(\n        data = x,\n        statistic = .boot_smoothness,\n        R = iterations,\n        method = method,\n        lag = lag\n      )\n      out_se <- stats::sd(results$t, na.rm = TRUE)\n      smooth_data <- data.frame(Smoothness = smooth_data, SE = out_se)\n    } else {\n      insight::format_warning(\"Package 'boot' needed for bootstrapping SEs.\")\n    }\n  }\n\n  class(smooth_data) <- unique(c(\"parameters_smoothness\", class(smooth_data)))\n  smooth_data\n}\n\n\n#' @export\nsmoothness.data.frame <- function(\n  x,\n  method = \"cor\",\n  lag = 1,\n  iterations = NULL,\n  ...\n) {\n  .smoothness <-\n    lapply(\n      x,\n      smoothness,\n      method = method,\n      lag = lag,\n      iterations = iterations\n    )\n  .smoothness <- cbind(\n    Parameter = names(.smoothness),\n    do.call(rbind, .smoothness)\n  )\n  class(.smoothness) <- unique(c(\"parameters_smoothness\", class(.smoothness)))\n  .smoothness\n}\n\n\n#' @export\nsmoothness.default <- function(\n  x,\n  method = \"cor\",\n  lag = 1,\n  iterations = NULL,\n  ...\n) {\n  smoothness(\n    .factor_to_numeric(x),\n    method = method,\n    lag = lag,\n    iterations = iterations\n  )\n}\n\n\n# bootstrapping -----------------------------------\n\n.boot_smoothness <- function(data, indices, method, lag) {\n  datawizard::smoothness(\n    x = data[indices],\n    method = method,\n    lag = lag,\n    iterations = NULL\n  )\n}\n\n\n# methods -----------------------------------------\n\n#' @export\nas.numeric.parameters_smoothness <- function(x, ...) {\n  if (is.data.frame(x)) {\n    x$Smoothness\n  } else {\n    as.vector(x)\n  }\n}\n\n#' @export\nas.double.parameters_smoothness <- as.numeric.parameters_smoothness\n"
  },
  {
    "path": "R/standardize.R",
    "content": "#' Standardization (Z-scoring)\n#'\n#' Performs a standardization of data (z-scoring), i.e., centering and scaling,\n#' so that the data is expressed in terms of standard deviation (i.e., mean = 0,\n#' SD = 1) or Median Absolute Deviance (median = 0, MAD = 1). When applied to a\n#' statistical model, this function extracts the dataset, standardizes it, and\n#' refits the model with this standardized version of the dataset. The\n#' [normalize()] function can also be used to scale all numeric variables within\n#' the 0 - 1 range.\n#' \\cr\\cr\n#' For model standardization, see [`standardize.default()`].\n#'\n#' @param x A (grouped) data frame, a vector or a statistical model (for\n#'   `unstandardize()` cannot be a model).\n#' @param robust Logical, if `TRUE`, centering is done by subtracting the\n#'   median from the variables and dividing it by the median absolute deviation\n#'   (MAD). If `FALSE`, variables are standardized by subtracting the\n#'   mean and dividing it by the standard deviation (SD).\n#' @param two_sd If `TRUE`, the variables are scaled by two times the deviation\n#'   (SD or MAD depending on `robust`). This method can be useful to obtain\n#'   model coefficients of continuous parameters comparable to coefficients\n#'   related to binary predictors, when applied to **the predictors** (not the\n#'   outcome) (Gelman, 2008).\n#' @param weights Can be `NULL` (for no weighting), or:\n#' - For model: if `TRUE` (default), a weighted-standardization is carried out.\n#' - For `data.frame`s: a numeric vector of weights, or a character of the\n#'   name of a column in the `data.frame` that contains the weights.\n#' - For numeric vectors: a numeric vector of weights.\n#' @param verbose Toggle warnings and messages on or off.\n#' @param remove_na How should missing values (`NA`) be treated: if `\"none\"`\n#'   (default): each column's standardization is done separately, ignoring\n#'   `NA`s. Else, rows with `NA` in the columns selected with `select` /\n#'   `exclude` (`\"selected\"`) or in all columns (`\"all\"`) are dropped before\n#'   standardization, and the resulting data frame does not include these cases.\n#' @param force Logical, if `TRUE`, forces standardization of factors and dates\n#'   as well. Factors are converted to numerical values, with the lowest level\n#'   being the value `1` (unless the factor has numeric levels, which are\n#'   converted to the corresponding numeric value).\n#' @param append Logical or string. If `TRUE`, standardized variables get new\n#'   column names (with the suffix `\"_z\"`) and are appended (column bind) to `x`,\n#'   thus returning both the original and the standardized variables. If `FALSE`,\n#'   original variables in `x` will be overwritten by their standardized versions.\n#'   If a character value, standardized variables are appended with new column\n#'   names (using the defined suffix) to the original data frame.\n#' @param reference A data frame or variable from which the centrality and\n#'   deviation will be computed instead of from the input variable. Useful for\n#'   standardizing a subset or new data according to another data frame.\n#' @param center,scale\n#' * For `standardize()`: \\cr\n#'   Numeric values, which can be used as alternative to `reference` to define\n#'   a reference centrality and deviation. If `scale` and `center` are of\n#'   length 1, they will be recycled to match the length of selected variables\n#'   for standardization. Else, `center` and `scale` must be of same length as\n#'   the number of selected variables. Values in `center` and `scale` will be\n#'   matched to selected variables in the provided order, unless a named vector\n#'   is given. In this case, names are matched against the names of the selected\n#'   variables.\n#'\n#' * For `unstandardize()`: \\cr\n#'   `center` and `scale` correspond to the center (the mean / median) and the scale (SD / MAD) of\n#'   the original non-standardized data (for data frames, should be named, or\n#'   have column order correspond to the numeric column). However, one can also\n#'   directly provide the original data through `reference`, from which the\n#'   center and the scale will be computed (according to `robust` and `two_sd`).\n#'   Alternatively, if the input contains the attributes `center` and `scale`\n#'   (as does the output of `standardize()`), it will take it from there if the\n#'   rest of the arguments are absent.\n#' @param force Logical, if `TRUE`, forces recoding of factors and character\n#'   vectors as well.\n#' @param ... Arguments passed to or from other methods.\n#' @inheritParams extract_column_names\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @return The standardized object (either a standardize data frame or a\n#'   statistical model fitted on standardized data).\n#'\n#' @note When `x` is a vector or a data frame with `remove_na = \"none\")`,\n#'   missing values are preserved, so the return value has the same length /\n#'   number of rows as the original input.\n#'\n#' @seealso See [center()] for grand-mean centering of variables, and\n#'   [makepredictcall.dw_transformer()] for use in model formulas.\n#'\n#' @family transform utilities\n#' @family standardize\n#'\n#' @examples\n#' d <- iris[1:4, ]\n#'\n#' # vectors\n#' standardise(d$Petal.Length)\n#'\n#' # Data frames\n#' # overwrite\n#' standardise(d, select = c(\"Sepal.Length\", \"Sepal.Width\"))\n#'\n#' # append\n#' standardise(d, select = c(\"Sepal.Length\", \"Sepal.Width\"), append = TRUE)\n#'\n#' # append, suffix\n#' standardise(d, select = c(\"Sepal.Length\", \"Sepal.Width\"), append = \"_std\")\n#'\n#' # standardizing with reference center and scale\n#' d <- data.frame(\n#'   a = c(-2, -1, 0, 1, 2),\n#'   b = c(3, 4, 5, 6, 7)\n#' )\n#'\n#' # default standardization, based on mean and sd of each variable\n#' standardize(d) # means are 0 and 5, sd ~ 1.581139\n#'\n#' # standardization, based on mean and sd set to the same values\n#' standardize(d, center = c(0, 5), scale = c(1.581, 1.581))\n#'\n#' # standardization, mean and sd for each variable newly defined\n#' standardize(d, center = c(3, 4), scale = c(2, 4))\n#'\n#' # standardization, taking same mean and sd for each variable\n#' standardize(d, center = 1, scale = 3)\n#' @export\nstandardize <- function(x, ...) {\n  UseMethod(\"standardize\")\n}\n\n#' @rdname standardize\n#' @export\nstandardise <- standardize\n\n\n# Default method is in effectsize\n\n# standardize.default <- function(x, verbose = TRUE, ...) {\n#   if (isTRUE(verbose)) {\n#     insight::format_alert(sprintf(\"Standardizing currently not possible for variables of class '%s'.\", class(x)[1])))\n#   }\n#   x\n# }\n\n#' @rdname standardize\n#' @export\nstandardize.numeric <- function(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  scale = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # set default - need to fix this, else we don't know whether this\n  # comes from \"center()\" or \"standardize()\". Furthermore, data.frame\n  # methods cannot return a vector of NULLs for each variable - instead\n  # they return NA. Thus, we have to treat NA like NULL\n  if (is.null(scale) || is.na(scale)) {\n    scale <- TRUE\n  }\n  if (is.null(center) || is.na(center)) {\n    center <- TRUE\n  }\n\n  my_args <- .process_std_center(\n    x,\n    weights,\n    robust,\n    verbose,\n    reference,\n    center,\n    scale\n  )\n  dot_args <- list(...)\n\n  # Perform standardization\n  if (is.null(my_args)) {\n    # all NA?\n    return(x)\n  } else if (is.null(my_args$check)) {\n    vals <- rep(0, length(my_args$vals)) # If only unique value\n  } else if (two_sd) {\n    vals <- as.vector((my_args$vals - my_args$center) / (2 * my_args$scale))\n  } else {\n    vals <- as.vector((my_args$vals - my_args$center) / my_args$scale)\n  }\n\n  scaled_x <- rep(NA, length(my_args$valid_x))\n  scaled_x[my_args$valid_x] <- vals\n  attr(scaled_x, \"center\") <- my_args$center\n  attr(scaled_x, \"scale\") <- my_args$scale\n  attr(scaled_x, \"robust\") <- robust\n  # labels\n  z <- .set_back_labels(scaled_x, x, include_values = FALSE)\n  if (!isFALSE(dot_args$add_transform_class)) {\n    class(z) <- c(\"dw_transformer\", class(z))\n  }\n  z\n}\n\n#' @export\nstandardize.double <- standardize.numeric\n\n#' @export\nstandardize.integer <- standardize.numeric\n\n#' @export\nstandardize.matrix <- function(x, ...) {\n  xl <- lapply(seq_len(ncol(x)), function(i) x[, i])\n\n  xz <- lapply(xl, datawizard::standardize, ...)\n\n  x_out <- do.call(cbind, xz)\n  dimnames(x_out) <- dimnames(x)\n\n  attr(x_out, \"center\") <- vapply(xz, attr, \"center\", FUN.VALUE = numeric(1L))\n  attr(x_out, \"scale\") <- vapply(xz, attr, \"scale\", FUN.VALUE = numeric(1L))\n  attr(x_out, \"robust\") <- vapply(xz, attr, \"robust\", FUN.VALUE = logical(1L))[\n    1\n  ]\n  class(x_out) <- c(\"dw_transformer\", class(x_out))\n\n  x_out\n}\n\n\n#' @rdname standardize\n#' @export\nstandardize.factor <- function(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = NULL,\n  force = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  if (!force) {\n    return(x)\n  }\n\n  standardize(\n    .factor_to_numeric(x),\n    robust = robust,\n    two_sd = two_sd,\n    weights = weights,\n    verbose = verbose,\n    ...\n  )\n}\n\n\n#' @export\nstandardize.character <- standardize.factor\n\n#' @export\nstandardize.logical <- standardize.factor\n\n#' @export\nstandardize.Date <- standardize.factor\n\n#' @export\nstandardize.AsIs <- standardize.numeric\n\n\n# Data frames -------------------------------------------------------------\n\n#' @rdname standardize\n#' @export\nstandardize.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  scale = NULL,\n  remove_na = c(\"none\", \"selected\", \"all\"),\n  force = FALSE,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # process arguments\n  my_args <- .process_std_args(\n    x,\n    select,\n    exclude,\n    weights,\n    append,\n    append_suffix = \"_z\",\n    keep_factors = force,\n    remove_na,\n    reference,\n    .center = center,\n    .scale = scale\n  )\n\n  # set new values\n  x <- my_args$x\n\n  # Loop through variables and standardize it\n  for (var in my_args$select) {\n    x[[var]] <- standardize(\n      x[[var]],\n      robust = robust,\n      two_sd = two_sd,\n      weights = my_args$weights,\n      reference = reference[[var]],\n      center = my_args$center[var],\n      scale = my_args$scale[var],\n      verbose = FALSE,\n      force = force,\n      add_transform_class = FALSE\n    )\n  }\n\n  attr(x, \"center\") <- unlist(lapply(x[my_args$select], function(z) {\n    attributes(z)$center\n  }))\n  attr(x, \"scale\") <- unlist(lapply(x[my_args$select], function(z) {\n    attributes(z)$scale\n  }))\n  attr(x, \"robust\") <- robust\n  x\n}\n\n\n#' @export\nstandardize.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  scale = NULL,\n  remove_na = c(\"none\", \"selected\", \"all\"),\n  force = FALSE,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  my_args <- .process_grouped_df(\n    x,\n    select,\n    exclude,\n    append,\n    append_suffix = \"_z\",\n    reference,\n    weights,\n    keep_factors = force\n  )\n\n  # create column(s) to store dw_transformer attributes\n  for (i in select) {\n    my_args$info$groups[[paste0(\"attr_\", i)]] <- rep(NA, length(my_args$grps))\n  }\n\n  for (rows in seq_along(my_args$grps)) {\n    tmp <- standardize(\n      my_args$x[my_args$grps[[rows]], , drop = FALSE],\n      select = my_args$select,\n      exclude = NULL,\n      robust = robust,\n      two_sd = two_sd,\n      weights = my_args$weights,\n      remove_na = remove_na,\n      verbose = verbose,\n      force = force,\n      append = FALSE,\n      center = center,\n      scale = scale,\n      add_transform_class = FALSE,\n      ...\n    )\n\n    # store dw_transformer_attributes\n    for (i in select) {\n      my_args$info$groups[rows, paste0(\"attr_\", i)][[\n        1\n      ]] <- list(unlist(attributes(tmp[[i]])))\n    }\n\n    my_args$x[my_args$grps[[rows]], ] <- tmp\n  }\n\n  # last column of \"groups\" attributes must be called \".rows\"\n  my_args$info$groups <- data_relocate(my_args$info$groups, \".rows\", after = -1)\n\n  # set back class, so data frame still works with dplyr\n  attributes(my_args$x) <- my_args$info\n  my_args$x\n}\n\n\n# Datagrid ----------------------------------------------------------------\n\n#' @export\nstandardize.datagrid <- function(x, ...) {\n  x[names(x)] <- standardize(\n    as.data.frame(x),\n    reference = attributes(x)$data,\n    ...\n  )\n  x\n}\n\n#' @export\nstandardize.visualisation_matrix <- standardize.datagrid\n"
  },
  {
    "path": "R/standardize.models.R",
    "content": "#' Re-fit a model with standardized data\n#'\n#' Performs a standardization of data (z-scoring) using\n#' [`standardize()`] and then re-fits the model to the standardized data.\n#' \\cr\\cr\n#' Standardization is done by completely refitting the model on the standardized\n#' data. Hence, this approach is equal to standardizing the variables *before*\n#' fitting the model and will return a new model object. This method is\n#' particularly recommended for complex models that include interactions or\n#' transformations (e.g., polynomial or spline terms). The `robust` (default to\n#' `FALSE`) argument enables a robust standardization of data, based on the\n#' `median` and the `MAD` instead of the `mean` and the `SD`.\n#'\n#' @param x A statistical model.\n#' @param weights If `TRUE` (default), a weighted-standardization is carried out.\n#' @param include_response If `TRUE` (default), the response value will also be\n#'   standardized. If `FALSE`, only the predictors will be standardized.\n#'   - Note that for GLMs and models with non-linear link functions, the\n#'   response value will not be standardized, to make re-fitting the model work.\n#'   - If the model contains an [stats::offset()], the offset variable(s) will\n#'   be standardized only if the response is standardized. If `two_sd = TRUE`,\n#'   offsets are standardized by one-sd (similar to the response).\n#'   - (For `mediate` models, the `include_response` refers to the outcome in\n#'   the y model; m model's response will always be standardized when possible).\n#' @inheritParams standardize\n#'\n#' @return A statistical model fitted on standardized data\n#'\n#' @details\n#'\n#' # Generalized Linear Models\n#' Standardization for generalized linear models (GLM, GLMM, etc) is done only\n#' with respect to the predictors (while the outcome remains as-is,\n#' unstandardized) - maintaining the interpretability of the coefficients (e.g.,\n#' in a binomial model: the exponent of the standardized parameter is the OR of\n#' a change of 1 SD in the predictor, etc.)\n#'\n#' # Dealing with Factors\n#' `standardize(model)` or `standardize_parameters(model, method = \"refit\")` do\n#' *not* standardize categorical predictors (i.e. factors) / their\n#' dummy-variables, which may be a different behaviour compared to other R\n#' packages (such as **lm.beta**) or other software packages (like SPSS). To\n#' mimic such behaviours, either use `standardize_parameters(model, method =\n#' \"basic\")` to obtain post-hoc standardized parameters, or standardize the data\n#' with `standardize(data, force = TRUE)` *before* fitting the\n#' model.\n#'\n#' # Transformed Variables\n#' When the model's formula contains transformations (e.g. `y ~ exp(X)`) the\n#' transformation effectively takes place after standardization (e.g.,\n#' `exp(scale(X))`). Since some transformations are undefined for none positive\n#' values, such as `log()` and `sqrt()`, the relevel variables are shifted (post\n#' standardization) by `Z - min(Z) + 1` or `Z - min(Z)` (respectively).\n#'\n#'\n#' @family standardize\n#' @examples\n#' model <- lm(Infant.Mortality ~ Education * Fertility, data = swiss)\n#' coef(standardize(model))\n#'\n#' @export\n#' @aliases standardize_models\nstandardize.default <- function(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = TRUE,\n  verbose = TRUE,\n  include_response = TRUE,\n  ...\n) {\n  if (!insight::is_model(x)) {\n    insight::format_warning(\n      paste0(\n        \"Objects or variables of class '\",\n        class(x)[1],\n        \"' cannot be standardized.\"\n      )\n    )\n    return(x)\n  }\n\n  data_std <- NULL # needed to avoid note\n  .standardize_models(\n    x,\n    robust = robust,\n    two_sd = two_sd,\n    weights = weights,\n    verbose = verbose,\n    include_response = include_response,\n    update_expr = stats::update(x, data = data_std),\n    ...\n  )\n}\n\n\n.standardize_models <- function(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = TRUE,\n  verbose = TRUE,\n  include_response = TRUE,\n  update_expr,\n  ...\n) {\n  # check model formula. Some notations don't work when standardizing data\n  insight::formula_ok(\n    x,\n    action = \"error\",\n    prefix_msg = \"Model cannot be standardized.\",\n    verbose = verbose\n  )\n\n  m_info <- .get_model_info(x, ...)\n  model_data <- insight::get_data(x, source = \"mf\", verbose = FALSE)\n\n  if (isTRUE(attr(model_data, \"is_subset\"))) {\n    insight::format_error(\"Cannot standardize a model fit with a 'subset = '.\")\n  }\n\n  if (m_info$is_bayesian && verbose) {\n    insight::format_warning(\n      \"Standardizing variables without adjusting priors may lead to bogus results unless priors are auto-scaled.\"\n    )\n  }\n\n  ## ---- Z the RESPONSE? ----\n  # 1. Some models have special responses that should not be standardized. This\n  # includes:\n  # - generalized linear models (counts, binomial, etc...)\n  # - Survival models\n  # 2. We also don't want to standardize the response when `two_sd = TRUE` -\n  # instead we will standardize the response separately.\n\n  include_response <- include_response && .safe_to_standardize_response(m_info)\n\n  resp <- NULL\n  if (!include_response || (include_response && two_sd)) {\n    resp <- c(\n      insight::find_response(x),\n      insight::find_response(x, combine = FALSE)\n    )\n    resp <- insight::clean_names(resp)\n    resp <- unique(resp)\n  }\n\n  # If there's an offset, don't standardize offset OR response\n  offsets <- insight::find_offset(x)\n  if (length(offsets)) {\n    if (include_response) {\n      if (verbose) {\n        insight::format_warning(\"Offset detected and will be standardized.\")\n      }\n\n      if (two_sd) {\n        # Treat offsets like responses - only standardize by 1 SD\n        resp <- c(resp, offsets)\n        offsets <- NULL\n      }\n    } else if (!include_response) {\n      # Don't standardize offsets if not standardizing the response\n      offsets <- NULL\n    }\n  }\n\n  ## ---- DO NOT Z: ----\n\n  # 1. WEIGHTS:\n  # because negative weights will cause errors in \"update()\"\n  weight_variable <- insight::find_weights(x)\n\n  if (\n    !is.null(weight_variable) &&\n      !weight_variable %in% colnames(model_data) &&\n      \"(weights)\" %in% colnames(model_data)\n  ) {\n    model_data$.missing_weight <- model_data[[\"(weights)\"]]\n    colnames(model_data)[ncol(model_data)] <- weight_variable\n    weight_variable <- c(weight_variable, \"(weights)\")\n  }\n\n  # 2. RANDOM-GROUPS:\n  random_group_factor <- insight::find_random(\n    x,\n    flatten = TRUE,\n    split_nested = TRUE\n  )\n\n  ## ---- SUMMARY: TO Z OR NOT TO Z? ----\n\n  dont_standardize <- c(resp, weight_variable, random_group_factor)\n  do_standardize <- setdiff(colnames(model_data), dont_standardize)\n\n  # can't std data$var variables\n  doller_vars <- grepl(\"(.*)\\\\$(.*)\", do_standardize)\n  if (any(doller_vars)) {\n    doller_vars <- colnames(model_data)[doller_vars]\n    insight::format_warning(\n      \"Unable to standardize variables evaluated in the environment (i.e., not in `data`).\",\n      \"The following variables will not be standardizd:\",\n      toString(doller_vars)\n    )\n    do_standardize <- setdiff(do_standardize, doller_vars)\n    dont_standardize <- c(dont_standardize, doller_vars)\n  }\n\n  if (!length(do_standardize)) {\n    insight::format_warning(\"No variables could be standardized.\")\n    return(x)\n  }\n\n  ## ---- STANDARDIZE! ----\n\n  w <- insight::get_weights(x, remove_na = TRUE)\n\n  data_std <- standardize(\n    model_data[do_standardize],\n    robust = robust,\n    two_sd = two_sd,\n    weights = if (weights) w,\n    verbose = verbose\n  )\n\n  # if two_sd, it must not affect the response!\n  if (include_response && two_sd) {\n    data_std[resp] <- standardize(\n      model_data[resp],\n      robust = robust,\n      two_sd = FALSE,\n      weights = if (weights) w,\n      verbose = verbose\n    )\n\n    dont_standardize <- setdiff(dont_standardize, resp)\n  }\n\n  # FIX LOG-SQRT VARS:\n  # if we standardize log-terms, standardization will fail (because log of\n  # negative value is NaN). Do some back-transformation here\n\n  log_terms <- .log_terms(x, data_std)\n  if (length(log_terms) > 0) {\n    data_std[log_terms] <- lapply(\n      data_std[log_terms],\n      function(i) i - min(i, na.rm = TRUE) + 1\n    )\n  }\n\n  # same for sqrt\n  sqrt_terms <- .sqrt_terms(x, data_std)\n  if (length(sqrt_terms) > 0) {\n    data_std[sqrt_terms] <- lapply(\n      data_std[sqrt_terms],\n      function(i) i - min(i, na.rm = TRUE)\n    )\n  }\n\n  if (verbose && length(c(log_terms, sqrt_terms))) {\n    insight::format_alert(\n      \"Formula contains log- or sqrt-terms.\",\n      \"See help(\\\"standardize\\\") for how such terms are standardized.\"\n    )\n  }\n\n  ## ---- ADD BACK VARS THAT WHERE NOT Z ----\n  if (length(dont_standardize)) {\n    remaining_columns <- intersect(colnames(model_data), dont_standardize)\n    data_std <- cbind(model_data[, remaining_columns, drop = FALSE], data_std)\n  }\n\n  ## ---- UPDATE MODEL WITH Z DATA ----\n  on.exit(.update_failed())\n\n  if (isTRUE(verbose)) {\n    model_std <- eval(substitute(update_expr))\n  } else {\n    utils::capture.output({\n      model_std <- eval(substitute(update_expr))\n    })\n  }\n\n  on.exit() # undo previous on.exit()\n\n  model_std\n}\n\n\n# Special methods ---------------------------------------------------------\n\n#' @export\nstandardize.brmsfit <- function(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = TRUE,\n  verbose = TRUE,\n  include_response = TRUE,\n  ...\n) {\n  data_std <- NULL # needed to avoid note\n  if (insight::is_multivariate(x)) {\n    insight::format_error(\n      \"Multivariate brmsfit models not supported.\",\n      \"As an alternative: you may standardize your data (and adjust your priors), and re-fit the model.\"\n    )\n  }\n\n  .standardize_models(\n    x,\n    robust = robust,\n    two_sd = two_sd,\n    weights = weights,\n    verbose = verbose,\n    include_response = include_response,\n    update_expr = stats::update(x, newdata = data_std),\n    ...\n  )\n}\n\n\n#' @export\nstandardize.mixor <- function(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = TRUE,\n  verbose = TRUE,\n  include_response = TRUE,\n  ...\n) {\n  data_std <- random_group_factor <- NULL # needed to avoid note\n  .standardize_models(\n    x,\n    robust = robust,\n    two_sd = two_sd,\n    weights = weights,\n    verbose = verbose,\n    include_response = include_response,\n    update_expr = {\n      data_std <- data_std[\n        order(data_std[, random_group_factor, drop = FALSE]),\n      ]\n      stats::update(x, data = data_std)\n    },\n    ...\n  )\n}\n\n\n#' @export\nstandardize.mediate <- function(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = TRUE,\n  verbose = TRUE,\n  include_response = TRUE,\n  ...\n) {\n  # models and data\n  y <- x$model.y\n  m <- x$model.m\n  y_data <- insight::get_data(y, source = \"mf\", verbose = FALSE)\n  m_data <- insight::get_data(m, source = \"mf\", verbose = FALSE)\n\n  # std models and data\n  y_std <- standardize(\n    y,\n    robust = robust,\n    two_sd = two_sd,\n    weights = weights,\n    verbose = verbose,\n    include_response = include_response,\n    ...\n  )\n  m_std <- standardize(\n    m,\n    robust = robust,\n    two_sd = two_sd,\n    weights = weights,\n    verbose = verbose,\n    include_response = TRUE,\n    ...\n  )\n  y_data_std <- insight::get_data(y_std, source = \"mf\", verbose = FALSE)\n  m_data_std <- insight::get_data(m_std, source = \"mf\", verbose = FALSE)\n\n  # fixed values\n  covs <- x$covariates\n  control.value <- x$control.value\n  treat.value <- x$treat.value\n\n  if (!is.null(covs)) {\n    covs <- mapply(\n      .rescale_fixed_values,\n      covs,\n      names(covs),\n      SIMPLIFY = FALSE,\n      MoreArgs = list(\n        y_data = y_data,\n        m_data = m_data,\n        y_data_std = y_data_std,\n        m_data_std = m_data_std\n      )\n    )\n    if (verbose) {\n      insight::format_alert(\n        \"Covariates' values have been rescaled to their standardized scales.\"\n      )\n    }\n  }\n\n  # if (is.numeric(y_data[[x$treat]]) || is.numeric(m_data[[x$treat]])) {\n  #   if (!(is.numeric(y_data[[x$treat]]) && is.numeric(m_data[[x$treat]]))) {\n  #     stop(\"'treat' variable is not of same type across both y and m models.\",\n  #          \"\\nCannot consistently standardize.\", call. = FALSE)\n  #   }\n  #\n  #   temp_vals <- .rescale_fixed_values(c(control.value, treat.value), x$treat,\n  #                                      y_data = y_data, m_data = m_data,\n  #                                      y_data_std = y_data_std, m_data_std = m_data_std)\n  #\n  #   control.value <- temp_vals[1]\n  #   treat.value <- temp_vals[2]\n  #   if (verbose) insight::format_alert(\"control and treatment values have been\n  #   rescaled to their standardized scales.\")\n  # }\n\n  if (verbose && !all(c(control.value, treat.value) %in% c(0, 1))) {\n    insight::format_warning(\n      \"Control and treat values are not 0 and 1, and have not been re-scaled.\",\n      \"Interpret results with caution.\"\n    )\n  }\n\n  junk <- utils::capture.output({\n    model_std <- stats::update(\n      x,\n      model.y = y_std,\n      model.m = m_std,\n      # control.value = control.value, treat.value = treat.value\n      covariates = covs\n    )\n  })\n\n  model_std\n}\n\n\n# Cannot ------------------------------------------------------------------\n\n#' @export\nstandardize.wbm <- function(x, ...) {\n  .update_failed(class(x))\n}\n\n#' @export\nstandardize.Surv <- standardize.wbm\n\n#' @export\nstandardize.clm2 <- standardize.wbm\n\n#' @export\nstandardize.bcplm <- standardize.wbm\n\n#' @export\nstandardize.wbgee <- standardize.wbm\n\n#' @export\nstandardize.biglm <- standardize.wbm\n# biglm doesn't regit the model to new data - it ADDs MORE data to the model.\n\n#' @export\n# Almost the same as `standardize.default()` but we pass `use_calling_env` in\n# update().\nstandardize.fixest <- function(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = TRUE,\n  verbose = TRUE,\n  include_response = TRUE,\n  ...\n) {\n  data_std <- NULL # needed to avoid note\n  .standardize_models(\n    x,\n    robust = robust,\n    two_sd = two_sd,\n    weights = weights,\n    verbose = verbose,\n    include_response = include_response,\n    update_expr = stats::update(x, data = data_std, use_calling_env = FALSE),\n    ...\n  )\n}\n\n# helper ----------------------------\n\n# Find log-terms inside model formula, and return \"clean\" term names\n.log_terms <- function(model, data) {\n  x <- insight::find_terms(model, flatten = TRUE)\n  # log_pattern <- \"^log\\\\((.*)\\\\)\"\n  log_pattern <- \"(log\\\\(log|log|log1|log10|log1p|log2)\\\\(([^,\\\\+)]*).*\"\n  out <- insight::trim_ws(gsub(\n    log_pattern,\n    \"\\\\2\",\n    grep(log_pattern, x, value = TRUE)\n  ))\n  intersect(colnames(data), out)\n}\n\n# Find log-terms inside model formula, and return \"clean\" term names\n.sqrt_terms <- function(model, data) {\n  x <- insight::find_terms(model, flatten = TRUE)\n  pattern <- \"sqrt\\\\(([^,\\\\+)]*).*\"\n  out <- insight::trim_ws(gsub(pattern, \"\\\\1\", grep(pattern, x, value = TRUE)))\n  intersect(colnames(data), out)\n}\n\n\n#' @keywords internal\n.safe_to_standardize_response <- function(info, verbose = TRUE) {\n  if (is.null(info)) {\n    if (verbose) {\n      insight::format_warning(\n        \"Unable to verify if response should not be standardized.\",\n        \"Response will be standardized.\"\n      )\n    }\n    return(TRUE)\n  }\n\n  # check if model has a response variable that should not be standardized.\n  info$is_linear &&\n    info$family != \"inverse.gaussian\" &&\n    !info$is_survival &&\n    !info$is_censored\n\n  # # alternative would be to keep something like:\n  # !info$is_count &&\n  #   !info$is_ordinal &&\n  #   !info$is_multinomial &&\n  #   !info$is_beta &&\n  #   !info$is_censored &&\n  #   !info$is_binomial &&\n  #   !info$is_survival\n  # # And then treating response for \"Gamma()\" or \"inverse.gaussian\" similar to\n  # # log-terms...\n}\n\n#' @keywords internal\n.rescale_fixed_values <- function(\n  val,\n  cov_nm,\n  y_data,\n  m_data,\n  y_data_std,\n  m_data_std\n) {\n  if (cov_nm %in% colnames(y_data)) {\n    temp_data <- y_data\n    temp_data_std <- y_data_std\n  } else {\n    temp_data <- m_data\n    temp_data_std <- m_data_std\n  }\n\n  rescale(\n    val,\n    to = range(temp_data_std[[cov_nm]]),\n    range = range(temp_data[[cov_nm]])\n  )\n}\n\n\n#' @keywords internal\n.update_failed <- function(class = NULL, ...) {\n  if (is.null(class)) {\n    msg1 <- \"Unable to refit the model with standardized data.\"\n  } else {\n    msg1 <- sprintf(\n      \"Standardization of parameters not possible for models of class '%s'.\",\n      class\n    )\n  }\n\n  insight::format_error(\n    msg1,\n    \"Try instead to standardize the data (standardize(data)) and refit the model manually.\"\n  )\n}\n"
  },
  {
    "path": "R/text_format.R",
    "content": "#' Convenient text formatting functionalities\n#'\n#' Convenience functions to manipulate and format text.\n#'\n#' @param text,text2 A character string.\n#' @param width Positive integer giving the target column width for wrapping\n#' lines in the output. Can be \"auto\", in which case it will select 90\\% of the\n#' default width.\n#' @param pattern Regex pattern to remove from `text`.\n#' @param sep Separator.\n#' @param last Last separator.\n#' @param n The number of characters to find.\n#' @param enclose Character that will be used to wrap elements of `text`, so\n#'   these can be, e.g., enclosed with quotes or backticks. If `NULL` (default),\n#'   text elements will not be enclosed.\n#' @param ... Other arguments to be passed to or from other functions.\n#'\n#' @return A character string.\n#'\n#' @examples\n#' # Add full stop if missing\n#' text_fullstop(c(\"something\", \"something else.\"))\n#'\n#' # Find last characters\n#' text_lastchar(c(\"ABC\", \"DEF\"), n = 2)\n#'\n#' # Smart concatenation\n#' text_concatenate(c(\"First\", \"Second\", \"Last\"))\n#' text_concatenate(c(\"First\", \"Second\", \"Last\"), last = \" or \", enclose = \"`\")\n#'\n#' # Remove parts of string\n#' text_remove(c(\"one!\", \"two\", \"three!\"), \"!\")\n#'\n#' # Wrap text\n#' long_text <- paste(rep(\"abc \", 100), collapse = \"\")\n#' cat(text_wrap(long_text, width = 50))\n#'\n#' # Paste with optional separator\n#' text_paste(c(\"A\", \"\", \"B\"), c(\"42\", \"42\", \"42\"))\n#' @export\ntext_format <- function(\n  text,\n  sep = \", \",\n  last = \" and \",\n  width = NULL,\n  enclose = NULL,\n  ...\n) {\n  text_wrap(\n    text_concatenate(text, sep = sep, last = last, enclose = enclose),\n    width = width\n  )\n}\n\n#' @rdname text_format\n#' @export\ntext_fullstop <- function(text) {\n  text[!text_lastchar(text) %in% c(\".\", \":\", \",\", \";\", \"!\", \"?\")] <- paste0(\n    text[text_lastchar(text) != \".\"],\n    \".\"\n  )\n  text\n}\n\n\n#' @rdname text_format\n#' @export\ntext_lastchar <- function(text, n = 1) {\n  vapply(\n    text,\n    function(xx) {\n      substr(xx, (nchar(xx) - n + 1), nchar(xx))\n    },\n    FUN.VALUE = character(1L)\n  )\n}\n\n\n#' @rdname text_format\n#' @export\ntext_concatenate <- function(text, sep = \", \", last = \" and \", enclose = NULL) {\n  if (length(text) == 1 && !nzchar(text, keepNA = TRUE)) {\n    return(text)\n  }\n  text <- text[text != \"\"] # nolint\n  if (\n    length(text) &&\n      !is.null(enclose) &&\n      length(enclose) == 1 &&\n      nzchar(enclose, keepNA = TRUE)\n  ) {\n    text <- paste0(enclose, text, enclose)\n  }\n  if (length(text) == 1) {\n    s <- text\n  } else {\n    s <- paste(text[1:(length(text) - 1)], collapse = sep)\n    s <- paste(c(s, text[length(text)]), collapse = last)\n  }\n  s\n}\n\n\n#' @rdname text_format\n#' @export\ntext_paste <- function(text, text2 = NULL, sep = \", \", enclose = NULL, ...) {\n  if (!is.null(text2)) {\n    if (\n      !is.null(enclose) &&\n        length(enclose) == 1 &&\n        nzchar(enclose, keepNA = TRUE)\n    ) {\n      text <- vapply(\n        text,\n        function(i) {\n          if (i != \"\") {\n            i <- paste0(enclose, i, enclose)\n          }\n          i\n        },\n        character(1L)\n      )\n      text2 <- vapply(\n        text2,\n        function(i) {\n          if (i != \"\") {\n            i <- paste0(enclose, i, enclose)\n          }\n          i\n        },\n        character(1L)\n      )\n    }\n    paste0(text, ifelse(text == \"\" | text2 == \"\", \"\", sep), text2) # nolint\n  }\n}\n\n\n#' @rdname text_format\n#' @export\ntext_remove <- function(text, pattern = \"\", ...) {\n  gsub(pattern, \"\", text, ...)\n}\n\n\n#' @rdname text_format\n#' @export\ntext_wrap <- function(text, width = NULL, ...) {\n  width <- width %||% getOption(\"width\")\n\n  text <- strsplit(text, \"\\n\", fixed = TRUE)\n  text <- unlist(text, use.names = FALSE)\n\n  wrapped <- \"\"\n\n  for (s in text) {\n    if (nchar(s) > width) {\n      leading_spaces <- nchar(s) - nchar(insight::trim_ws(s))\n      s <- strwrap(s, width = width)\n      s <- paste(s, collapse = \"\\n\")\n      s <- paste0(strrep(\" \", leading_spaces), s)\n    }\n    wrapped <- paste0(wrapped, s, \"\\n\")\n  }\n\n  wrapped\n}\n"
  },
  {
    "path": "R/to_factor.R",
    "content": "#' @title Convert data to factors\n#' @name to_factor\n#'\n#' @details\n#' Convert variables or data into factors. If the data is labelled, value labels\n#' will be used as factor levels. The counterpart to convert variables into\n#' numeric is `to_numeric()`.\n#'\n#' @param x A data frame or vector.\n#' @param labels_to_levels Logical, if `TRUE`, value labels are used as factor\n#' levels after `x` was converted to factor. Else, factor levels are based on\n#' the values of `x` (i.e. as if using `as.factor()`).\n#' @param ... Arguments passed to or from other methods.\n#' @inheritParams extract_column_names\n#' @inheritParams categorize\n#'\n#' @inheritSection center Selection of variables - the `select` argument\n#'\n#' @return A factor, or a data frame of factors.\n#'\n#' @note Factors are ignored and returned as is. If you want to use value labels\n#' as levels for factors, use [`labels_to_levels()`] instead.\n#'\n#' @examples\n#' str(to_factor(iris))\n#'\n#' # use labels as levels\n#' data(efc)\n#' str(efc$c172code)\n#' head(to_factor(efc$c172code))\n#' @export\nto_factor <- function(x, ...) {\n  UseMethod(\"to_factor\")\n}\n\n\n#' @export\nto_factor.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      sprintf(\n        \"Converting into factors values currently not possible for variables of class `%s`.\",\n        class(x)[1]\n      )\n    )\n  }\n  x\n}\n\n#' @export\nto_factor.factor <- function(x, ...) {\n  x\n}\n\n#' @rdname to_factor\n#' @export\nto_factor.numeric <- function(x, labels_to_levels = TRUE, verbose = TRUE, ...) {\n  # preserve labels\n  variable_label <- attr(x, \"label\", exact = TRUE)\n  value_labels <- attr(x, \"labels\", exact = TRUE)\n\n  # to factor\n  x <- as.factor(x)\n\n  # add back labels\n  attr(x, \"label\") <- variable_label\n  attr(x, \"labels\") <- value_labels\n\n  # value labels to factor levels\n  if (labels_to_levels) {\n    x <- .value_labels_to_levels(x, verbose = verbose, ...)\n  }\n  x\n}\n\n#' @export\nto_factor.logical <- to_factor.numeric\n\n#' @export\nto_factor.character <- to_factor.numeric\n\n#' @export\nto_factor.Date <- to_factor.numeric\n\n#' @export\nto_factor.haven_labelled <- to_factor.numeric\n\n#' @export\nto_factor.double <- to_factor.numeric\n\n#' @rdname to_factor\n#' @export\nto_factor.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  append = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # validation check, return as is for complete factor\n  if (all(vapply(x, is.factor, FUN.VALUE = logical(1L)))) {\n    return(x)\n  }\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # drop factors, when append is not FALSE\n    select <- colnames(x[select])[\n      !vapply(x[select], is.factor, FUN.VALUE = logical(1L))\n    ]\n    # process arguments\n    my_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_f\",\n      keep_factors = FALSE,\n      keep_character = TRUE,\n      preserve_value_labels = TRUE\n    )\n    # update processed arguments\n    x <- my_args$x\n    select <- my_args$select\n  }\n\n  x[select] <- lapply(x[select], to_factor, verbose = verbose, ...)\n  x\n}\n"
  },
  {
    "path": "R/to_numeric.R",
    "content": "#' Convert data to numeric\n#'\n#' Convert data to numeric by converting characters to factors and factors to\n#' either numeric levels or dummy variables. The \"counterpart\" to convert\n#' variables into factors is `to_factor()`.\n#'\n#' @param x A data frame, factor or vector.\n#' @param dummy_factors Transform factors to dummy factors (all factor levels as\n#'   different columns filled with a binary 0-1 value).\n#' @param preserve_levels Logical, only applies if `x` is a factor. If `TRUE`,\n#' and `x` has numeric factor levels, these will be converted into the related\n#' numeric values. If this is not possible, the converted numeric values will\n#' start from 1 to number of levels.\n#' @param lowest Numeric, indicating the lowest (minimum) value when converting\n#' factors or character vectors to numeric values.\n#' @param ... Arguments passed to or from other methods.\n#' @inheritParams extract_column_names\n#' @inheritParams categorize\n#'\n#' @note When factors should be converted into multiple \"binary\" dummies, i.e.\n#' each factor level is converted into a separate column filled with a binary\n#' 0-1 value, set `dummy_factors = TRUE`. If you want to preserve the original\n#' factor levels (in case these represent numeric values), use\n#' `preserve_levels = TRUE`.\n#'\n#' @section Selection of variables - `select` argument:\n#' For most functions that have a `select` argument the complete input data\n#' frame is returned, even when `select` only selects a range of variables.\n#' However, for `to_numeric()`, factors might be converted into dummies,\n#' thus, the number of variables of the returned data frame no longer match\n#' the input data frame. Hence, when `select` is used, *only* those variables\n#' (or their dummies) specified in `select` will be returned. Use `append=TRUE`\n#' to also include the original variables in the returned data frame.\n#'\n#' @examples\n#' to_numeric(head(ToothGrowth))\n#' to_numeric(head(ToothGrowth), dummy_factors = TRUE)\n#'\n#' # factors\n#' x <- as.factor(mtcars$gear)\n#' to_numeric(x)\n#' to_numeric(x, preserve_levels = TRUE)\n#' # same as:\n#' coerce_to_numeric(x)\n#'\n#' @return A data frame of numeric variables.\n#'\n#' @export\nto_numeric <- function(x, ...) {\n  UseMethod(\"to_numeric\")\n}\n\n#' @export\nto_numeric.default <- function(x, verbose = TRUE, ...) {\n  if (isTRUE(verbose)) {\n    insight::format_alert(\n      sprintf(\n        \"Converting into numeric values currently not possible for variables of class '%s'.\",\n        class(x)[1]\n      )\n    )\n  }\n  x\n}\n\n\n#' @rdname to_numeric\n#' @export\nto_numeric.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  dummy_factors = FALSE,\n  preserve_levels = FALSE,\n  lowest = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # validation check, return as is for complete numeric\n  if (all(vapply(x, is.numeric, FUN.VALUE = logical(1L)))) {\n    return(x)\n  }\n\n  df_attr <- attributes(x)\n\n  # evaluate arguments\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # when we append variables, we call \".process_append()\", which will\n  # create the new variables and updates \"select\", so new variables are processed\n  if (!isFALSE(append)) {\n    # drop numerics, when append is not FALSE\n    select <- colnames(x[select])[\n      !vapply(x[select], is.numeric, FUN.VALUE = logical(1L))\n    ]\n    # process arguments\n    fun_args <- .process_append(\n      x,\n      select,\n      append,\n      append_suffix = \"_n\",\n      keep_factors = TRUE\n    )\n    # update processed arguments\n    x <- fun_args$x\n    select <- fun_args$select\n  }\n\n  out <- sapply(\n    x[select],\n    to_numeric,\n    dummy_factors = dummy_factors,\n    preserve_levels = preserve_levels,\n    lowest = lowest,\n    verbose = verbose,\n    simplify = FALSE\n  )\n\n  # save variable attributes\n  attr_vars <- lapply(out, attributes)\n  # \"out\" is currently a list, bind columns and to data frame\n  out <- as.data.frame(do.call(cbind, out))\n  # set back attributes\n  for (i in colnames(out)) {\n    if (is.list(attr_vars[[i]])) {\n      if (is.list(attributes(out[[i]]))) {\n        attributes(out[[i]]) <- utils::modifyList(\n          attr_vars[[i]],\n          attributes(out[[i]])\n        )\n      } else {\n        attributes(out[[i]]) <- attr_vars[[i]]\n      }\n    }\n  }\n\n  # due to the special handling of dummy factors, we need to take care\n  # of appending the data here again. usually, \"fun_args$x\" includes the appended\n  # data, which does not work here...\n\n  if (!isFALSE(append)) {\n    common_columns <- intersect(colnames(x), colnames(out))\n    if (length(common_columns)) {\n      x[common_columns] <- NULL\n    }\n    out <- cbind(x, out)\n  }\n\n  # add back custom attributes\n  out <- .replace_attrs(out, df_attr)\n  out\n}\n\n\n#' @export\nto_numeric.numeric <- function(x, verbose = TRUE, ...) {\n  .set_back_labels(as.numeric(x), x, reverse_values = FALSE)\n}\n\n#' @export\nto_numeric.double <- to_numeric.numeric\n\n#' @export\nto_numeric.logical <- to_numeric.numeric\n\n#' @export\nto_numeric.haven_labelled <- to_numeric.numeric\n\n#' @export\nto_numeric.Date <- function(x, verbose = TRUE, ...) {\n  if (verbose) {\n    insight::format_warning(\n      paste0(\n        \"Converting a date-time variable of class `\",\n        class(x)[1],\n        \"` into numeric.\"\n      ),\n      \"Please note that this conversion probably does not return meaningful results.\"\n    )\n  }\n  as.numeric(x)\n}\n\n#' @export\nto_numeric.POSIXt <- to_numeric.Date\n\n#' @export\nto_numeric.POSIXct <- to_numeric.Date\n\n#' @export\nto_numeric.POSIXlt <- to_numeric.Date\n\n\n#' @export\nto_numeric.factor <- function(\n  x,\n  dummy_factors = FALSE,\n  preserve_levels = FALSE,\n  lowest = NULL,\n  verbose = TRUE,\n  ...\n) {\n  # preserving levels only works when factor levels are numeric\n  if (\n    isTRUE(preserve_levels) &&\n      anyNA(suppressWarnings(as.numeric(as.character(stats::na.omit(x)))))\n  ) {\n    preserve_levels <- FALSE\n  }\n\n  if (dummy_factors) {\n    out <- as.data.frame(stats::model.matrix(\n      ~x,\n      contrasts.arg = list(x = \"contr.treatment\")\n    ))\n    out[1] <- as.numeric(rowSums(out[2:ncol(out)]) == 0)\n\n    # insert back NA rows. if \"x\" had missing values, model.matrix() creates an\n    # array with only non-missing values, so some rows are missing. First, we\n    # need to now which rows are missing (na_values) and the length of the\n    # original vector (which will be the number of rows in the final data frame)\n\n    na_values <- which(is.na(x))\n    rows_x <- length(x)\n\n    if (any(na_values)) {\n      # iterate all missing values that have\n      for (i in seq_along(na_values)) {\n        # if the first observation was missing, add NA row and bind data frame\n        if (i == 1 && na_values[i] == 1) {\n          out <- rbind(NA, out)\n        } else if (na_values[i] == rows_x) {\n          # if the last observation was NA, add NA row to data frame\n          out <- rbind(out, NA)\n        } else {\n          # else, pick rows from beginning to current NA value, add NA,\n          # and rbind the remaining rows\n          out <- rbind(\n            out[1:(na_values[i] - 1), ],\n            NA,\n            out[na_values[i]:nrow(out), ]\n          )\n        }\n      }\n      rownames(out) <- NULL\n    }\n    names(out) <- levels(x)\n  } else if (preserve_levels) {\n    if (is.unsorted(levels(x))) {\n      x_inverse <- rep(NA_real_, length(x))\n      for (i in 1:nlevels(x)) {\n        x_inverse[x == levels(x)[i]] <- as.numeric(levels(x)[\n          nlevels(x) - i + 1\n        ])\n      }\n      x <- factor(x_inverse)\n    }\n    out <- .set_back_labels(\n      as.numeric(as.character(x)),\n      x,\n      reverse_values = FALSE\n    )\n  } else {\n    out <- .set_back_labels(as.numeric(x), x, reverse_values = FALSE)\n  }\n\n  # shift to requested starting value\n  if (!is.null(lowest)) {\n    difference <- min(out) - lowest\n    out <- out - difference\n  }\n\n  out\n}\n\n\n#' @export\nto_numeric.character <- function(\n  x,\n  dummy_factors = FALSE,\n  lowest = NULL,\n  verbose = TRUE,\n  ...\n) {\n  numbers <- vapply(\n    x,\n    function(i) {\n      element <- tryCatch(str2lang(i), error = function(e) NULL)\n      !is.null(element) && is.numeric(element)\n    },\n    FUN.VALUE = logical(1L)\n  )\n  if (all(numbers)) {\n    out <- as.numeric(vapply(x, str2lang, FUN.VALUE = numeric(1L)))\n  } else {\n    out <- to_numeric(as.factor(x), dummy_factors = dummy_factors)\n  }\n\n  # shift to requested starting value\n  if (!is.null(lowest)) {\n    difference <- min(out) - lowest\n    out <- out - difference\n  }\n\n  out\n}\n\n\n#' Convert to Numeric (if possible)\n#'\n#' Tries to convert vector to numeric if possible (if no warnings or errors).\n#' Otherwise, leaves it as is.\n#'\n#' @param x A vector to be converted.\n#'\n#' @examples\n#' coerce_to_numeric(c(\"1\", \"2\"))\n#' coerce_to_numeric(c(\"1\", \"2\", \"A\"))\n#' @return Numeric vector (if possible)\n#' @export\ncoerce_to_numeric <- function(x) {\n  tryCatch(\n    as.numeric(as.character(x)),\n    error = function(e) x,\n    warning = function(w) x\n  )\n}\n"
  },
  {
    "path": "R/unnormalize.R",
    "content": "#' @rdname normalize\n#' @export\nunnormalize <- function(x, ...) {\n  UseMethod(\"unnormalize\")\n}\n\n\n#' @export\nunnormalize.default <- function(x, ...) {\n  insight::format_error(\n    \"Variables of class '\",\n    class(x)[1],\n    \"' can't be unnormalized.\"\n  )\n}\n\n\n#' @rdname normalize\n#' @export\nunnormalize.numeric <- function(x, verbose = TRUE, ...) {\n  ## TODO implement algorithm include_bounds = FALSE\n\n  # if function called from the \"grouped_df\" method, we use the dw_transformer\n  # attributes that were recovered in the \"grouped_df\" method\n\n  dots <- match.call(expand.dots = FALSE)[[\"...\"]]\n  grp_attr_dw <- eval(dots$grp_attr_dw, envir = parent.frame(1L))\n\n  if (is.null(grp_attr_dw)) {\n    include_bounds <- attr(x, \"include_bounds\")\n    min_value <- attr(x, \"min_value\")\n    range_difference <- attr(x, \"range_difference\")\n    to_range <- attr(x, \"to_range\")\n  } else {\n    names(grp_attr_dw) <- gsub(\".*\\\\.\", \"\", names(grp_attr_dw))\n    include_bounds <- grp_attr_dw[\"include_bounds\"]\n    min_value <- grp_attr_dw[\"min_value\"]\n    range_difference <- grp_attr_dw[\"range_difference\"]\n    to_range <- grp_attr_dw[\"to_range\"]\n    if (is.na(to_range)) {\n      to_range <- NULL\n    }\n  }\n\n  if (is.null(min_value) || is.null(range_difference)) {\n    if (verbose) {\n      insight::format_error(\n        \"Can't unnormalize variable. Information about range and/or minimum value is missing.\"\n      )\n    }\n    return(x)\n  }\n\n  if (is.null(to_range)) {\n    x * range_difference + min_value\n  } else {\n    (x - to_range[1]) * (range_difference / diff(to_range)) + min_value\n  }\n}\n\n\n#' @rdname normalize\n#' @export\nunnormalize.data.frame <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  # if function called from the \"grouped_df\" method, we use the dw_transformer\n  # attributes that were recovered in the \"grouped_df\" method\n\n  dots <- match.call(expand.dots = FALSE)[[\"...\"]]\n\n  if (is.null(dots$grp_attr_dw)) {\n    grp_attr_dw <- NULL\n  } else {\n    grp_attr_dw <- eval(dots$grp_attr_dw, envir = parent.frame(1L))\n  }\n\n  for (i in select) {\n    var_attr <- grep(paste0(\"^attr\\\\_\", i, \"\\\\.\"), names(grp_attr_dw))\n    attrs <- grp_attr_dw[var_attr]\n    x[[i]] <- unnormalize(x[[i]], verbose = verbose, grp_attr_dw = attrs)\n  }\n\n  x\n}\n\n#' @rdname normalize\n#' @export\nunnormalize.grouped_df <- function(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    remove_group_var = TRUE,\n    verbose = verbose\n  )\n\n  info <- attributes(x)\n\n  grps <- attr(x, \"groups\", exact = TRUE)[[\".rows\"]]\n\n  x <- as.data.frame(x)\n\n  for (i in select) {\n    if (is.null(info$groups[[paste0(\"attr_\", i)]])) {\n      insight::format_error(\n        paste(\n          \"Couldn't retrieve the necessary information to unnormalize\",\n          text_concatenate(i, enclose = \"`\")\n        )\n      )\n    }\n  }\n  for (rows in seq_along(grps)) {\n    # get the dw_transformer attributes for this group\n    raw_attrs <- unlist(info$groups[\n      rows,\n      startsWith(names(info$groups), \"attr\")\n    ])\n    if (length(select) == 1L) {\n      names(raw_attrs) <- paste0(\"attr_\", select, \".\", names(raw_attrs))\n    }\n\n    tmp <- unnormalize(\n      x[grps[[rows]], , drop = FALSE],\n      select = select,\n      exclude = exclude,\n      ignore_case = ignore_case,\n      regex = regex,\n      verbose = verbose,\n      grp_attr_dw = raw_attrs\n    )\n    x[grps[[rows]], ] <- tmp\n  }\n  # set back class, so data frame still works with dplyr\n  attributes(x) <- utils::modifyList(info, attributes(x))\n  class(x) <- c(\"grouped_df\", class(x))\n  x\n}\n"
  },
  {
    "path": "R/unstandardize.R",
    "content": "#' @rdname standardize\n#' @export\nunstandardize <- function(x, ...) {\n  UseMethod(\"unstandardize\")\n}\n\n#' @rdname standardize\n#' @export\nunstandardise <- unstandardize\n\n#' @rdname standardize\n#' @export\nunstandardize.numeric <- function(\n  x,\n  center = NULL,\n  scale = NULL,\n  reference = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  ...\n) {\n  if (!is.null(reference)) {\n    if (robust) {\n      center <- stats::median(reference, na.rm = TRUE)\n      scale <- stats::mad(reference, na.rm = TRUE)\n    } else {\n      center <- mean(reference, na.rm = TRUE)\n      scale <- stats::sd(reference, na.rm = TRUE)\n    }\n  } else if (is.null(center) || is.null(scale)) {\n    if (all(c(\"center\", \"scale\") %in% names(attributes(x)))) {\n      center <- attr(x, \"center\", exact = TRUE)\n      scale <- attr(x, \"scale\", exact = TRUE)\n      attr(x, \"scale\") <- attr(x, \"center\") <- NULL\n    } else if (\n      all(c(\"scaled:center\", \"scaled:scale\") %in% names(attributes(x)))\n    ) {\n      center <- attr(x, \"scaled:center\", exact = TRUE)\n      scale <- attr(x, \"scaled:scale\", exact = TRUE)\n      attr(x, \"scaled:scale\") <- attr(x, \"scaled:center\") <- NULL\n    } else {\n      insight::format_error(\n        \"You must provide the arguments `center`, `scale` or `reference`.\"\n      )\n    }\n  }\n\n  if (two_sd) {\n    scale <- 2 * scale\n  }\n\n  x * scale + center\n}\n\n#' @rdname standardize\n#' @export\nunstandardize.data.frame <- function(\n  x,\n  center = NULL,\n  scale = NULL,\n  reference = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # Select and deselect\n  cols <- .select_nse(\n    select,\n    x,\n    exclude = exclude,\n    ignore_case,\n    regex = regex,\n    verbose = verbose\n  )\n\n  dots <- match.call(expand.dots = FALSE)[[\"...\"]]\n\n  if (is.null(dots$grp_attr_dw)) {\n    grp_attr_dw <- NULL\n  } else {\n    grp_attr_dw <- eval(dots$grp_attr_dw, envir = parent.frame(1L))\n  }\n\n  if (!is.null(grp_attr_dw)) {\n    center <- vapply(\n      cols,\n      function(x) {\n        grp_attr_dw[grep(\n          paste0(\"^attr\\\\_\", x, \"\\\\.center\"),\n          names(grp_attr_dw)\n        )]\n      },\n      FUN.VALUE = numeric(1L)\n    )\n    scale <- vapply(\n      cols,\n      function(x) {\n        grp_attr_dw[grep(paste0(\"^attr\\\\_\", x, \"\\\\.scale\"), names(grp_attr_dw))]\n      },\n      FUN.VALUE = numeric(1L)\n    )\n    i <- vapply(x[, cols, drop = FALSE], is.numeric, FUN.VALUE = logical(1L))\n  } else if (!is.null(reference)) {\n    i <- vapply(x[, cols, drop = FALSE], is.numeric, FUN.VALUE = logical(1L))\n    i <- i[i]\n    reference <- reference[names(i)]\n    if (robust) {\n      center <- vapply(\n        reference,\n        FUN.VALUE = numeric(1L),\n        stats::median,\n        na.rm = TRUE\n      )\n      scale <- vapply(\n        reference,\n        FUN.VALUE = numeric(1L),\n        stats::mad,\n        na.rm = TRUE\n      )\n    } else {\n      center <- vapply(reference, FUN.VALUE = numeric(1L), mean, na.rm = TRUE)\n      scale <- vapply(\n        reference,\n        FUN.VALUE = numeric(1L),\n        stats::sd,\n        na.rm = TRUE\n      )\n    }\n  } else if (is.null(center) || is.null(scale)) {\n    i <- vapply(\n      x[, cols, drop = FALSE],\n      function(k) {\n        a <- attributes(k)\n        is.numeric(k) && !is.null(a) && all(c(\"scale\", \"center\") %in% names(a))\n      },\n      FUN.VALUE = logical(1L)\n    )\n\n    if (any(i)) {\n      i <- i[i]\n      center <- vapply(\n        x[names(i)],\n        FUN.VALUE = numeric(1L),\n        attr,\n        \"center\",\n        exact = TRUE\n      )\n      scale <- vapply(\n        x[names(i)],\n        FUN.VALUE = numeric(1L),\n        attr,\n        \"scale\",\n        exact = TRUE\n      )\n    } else if (all(c(\"center\", \"scale\") %in% names(attributes(x)))) {\n      center <- attr(x, \"center\", exact = TRUE)\n      scale <- attr(x, \"scale\", exact = TRUE)\n      attr(x, \"center\") <- attr(x, \"scale\") <- NULL\n      i <- names(x) %in% names(scale)\n      i <- i[i]\n    } else {\n      insight::format_error(\n        \"You must provide the arguments `center`, `scale` or `reference`.\"\n      )\n    }\n  } else {\n    if (is.null(names(center))) {\n      i <- vapply(x, is.numeric, FUN.VALUE = logical(1L))\n      names(center) <- names(scale) <- names(x[i])\n    }\n\n    i <- names(x) %in% names(center)\n    names(i) <- names(x)\n    i <- i[i]\n  }\n\n  if (two_sd) {\n    scale <- 2 * scale\n  }\n\n  cols <- names(i)\n\n  # Apply unstandardization to cols\n  for (col in cols) {\n    x[col] <- unstandardize(\n      x[[col]],\n      center = center[[col]],\n      scale = scale[[col]]\n    )\n  }\n  x\n}\n\n#' @export\nunstandardize.factor <- function(x, ...) {\n  x\n}\n\n#' @export\nunstandardize.character <- function(x, ...) {\n  x\n}\n\n\n#' @export\nunstandardize.grouped_df <- function(\n  x,\n  center = NULL,\n  scale = NULL,\n  reference = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  # evaluate select/exclude, may be select-helpers\n  select <- .select_nse(\n    select,\n    x,\n    exclude,\n    ignore_case,\n    regex = regex,\n    remove_group_var = TRUE,\n    verbose = verbose\n  )\n\n  info <- attributes(x)\n\n  grps <- attr(x, \"groups\", exact = TRUE)[[\".rows\"]]\n\n  x <- as.data.frame(x)\n\n  for (i in select) {\n    if (is.null(info$groups[[paste0(\"attr_\", i)]])) {\n      insight::format_error(\n        paste(\n          \"Couldn't retrieve the necessary information to unstandardize\",\n          text_concatenate(i, enclose = \"`\")\n        )\n      )\n    }\n  }\n\n  for (rows in seq_along(grps)) {\n    # get the dw_transformer attributes for this group\n    raw_attrs <- unlist(info$groups[\n      rows,\n      startsWith(names(info$groups), \"attr\")\n    ])\n    if (length(select) == 1L) {\n      names(raw_attrs) <- paste0(\"attr_\", select, \".\", names(raw_attrs))\n    }\n\n    tmp <- unstandardise(\n      x[grps[[rows]], , drop = FALSE],\n      center = center,\n      scale = scale,\n      reference = reference,\n      robust = robust,\n      two_sd = two_sd,\n      select = select,\n      exclude = exclude,\n      ignore_case = ignore_case,\n      regex = regex,\n      verbose = verbose,\n      grp_attr_dw = raw_attrs\n    )\n    x[grps[[rows]], ] <- tmp\n  }\n  # set back class, so data frame still works with dplyr\n  attributes(x) <- utils::modifyList(info, attributes(x))\n  class(x) <- c(\"grouped_df\", class(x))\n  x\n}\n\n#' @export\nunstandardize.matrix <- function(\n  x,\n  center = NULL,\n  scale = NULL,\n  reference = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  ...\n) {\n  if (all(c(\"scaled:center\", \"scaled:scale\") %in% names(attributes(x)))) {\n    center <- attr(x, \"scaled:center\", exact = TRUE)\n    scale <- attr(x, \"scaled:scale\", exact = TRUE)\n    attr(x, \"scaled:center\") <- attr(x, \"scaled:scale\") <- NULL\n\n    for (col in seq_len(ncol(x))) {\n      x[, col] <- unstandardize.numeric(\n        x[, col],\n        center = center[col],\n        scale = scale[col]\n      )\n    }\n  } else {\n    scales <- attr(x, \"scale\")\n    centers <- attr(x, \"center\")\n\n    xl <- lapply(seq_len(ncol(x)), function(i) {\n      tmp <- x[, i]\n      attributes(tmp) <- list(center = centers[i], scale = scales[i])\n      tmp\n    })\n\n    xz <- lapply(xl, datawizard::unstandardize, ...)\n    x_out <- do.call(cbind, xz)\n    dimnames(x_out) <- dimnames(x)\n\n    x <- x_out\n  }\n  x\n}\n\n#' @export\nunstandardize.array <- unstandardize.matrix\n\n\n# Datagrid ----------------------------------------------------------------\n\n#' @export\nunstandardize.datagrid <- function(x, ...) {\n  x[names(x)] <- unstandardize(\n    as.data.frame(x),\n    reference = attributes(x)$data,\n    ...\n  )\n  x\n}\n\n#' @export\nunstandardize.visualisation_matrix <- unstandardize.datagrid\n"
  },
  {
    "path": "R/utils-cols.R",
    "content": "#' Tools for working with column names\n#'\n#' @param x A data frame.\n#' @param row Row to use as column names.\n#' @param na_prefix Prefix to give to the column name if the row has an `NA`.\n#' Default is 'x', and it will be incremented at each `NA` (`x1`, `x2`, etc.).\n#' @param verbose Toggle warnings.\n#' @param prefix Prefix to give to the column name. Default is 'x', and it will\n#' be incremented at each column (`x1`, `x2`, etc.).\n#'\n#' @return\n#' `row_to_colnames()` and `colnames_to_row()` both return a data frame.\n#'\n#' @rdname colnames\n#'\n#' @export\n#'\n#' @examples\n#' # Convert a row to column names --------------------------------\n#' test <- data.frame(\n#'   a = c(\"iso\", 2, 5),\n#'   b = c(\"year\", 3, 6),\n#'   c = c(\"value\", 5, 7)\n#' )\n#' test\n#' row_to_colnames(test)\n#'\n#' # Convert column names to row --------------------------------\n#' test <- data.frame(\n#'   ARG = c(\"BRA\", \"FRA\"),\n#'   `1960` = c(1960, 1960),\n#'   `2000` = c(2000, 2000)\n#' )\n#' test\n#' colnames_to_row(test)\n#'\nrow_to_colnames <- function(x, row = 1, na_prefix = \"x\", verbose = TRUE) {\n  if (!is.numeric(row)) {\n    insight::format_error(\"Argument `row` must be of type numeric.\")\n  }\n  if (length(row) != 1) {\n    insight::format_error(\"Argument `row` must be of length 1.\")\n  }\n  if (nrow(x) < row) {\n    insight::format_error(\n      paste0(\n        \"You used row = \",\n        row,\n        \" but the dataset only has \",\n        nrow(x),\n        \" rows.\"\n      )\n    )\n  }\n\n  new_colnames <- as.character(unlist(x[row, ], use.names = FALSE))\n\n  # Create default colnames if there are NAs in the row used\n  which_na <- which(is.na(new_colnames))\n  n_na <- length(which_na)\n  if (n_na > 0) {\n    for (i in seq_along(which_na)) {\n      new_colnames[which_na[i]] <- paste0(na_prefix, i)\n    }\n    if (verbose) {\n      insight::format_warning(\n        paste0(\n          \"Some values of row \",\n          row,\n          \" were NAs. The corresponding column names are prefixed with `\",\n          na_prefix,\n          \"`.\"\n        )\n      )\n    }\n  }\n  colnames(x) <- new_colnames\n  x[-row, ]\n}\n\n\n#' @rdname colnames\n#' @export\ncolnames_to_row <- function(x, prefix = \"x\") {\n  if (length(prefix) != 1) {\n    insight::format_error(\"Argument `prefix` must be of length 1.\")\n  }\n  if (!is.character(prefix)) {\n    insight::format_error(\"Argument `prefix` must be of type character.\")\n  }\n  x2 <- rbind(colnames(x), x)\n  colnames(x2) <- paste0(prefix, seq_len(ncol(x2)))\n  x2\n}\n"
  },
  {
    "path": "R/utils-rows.R",
    "content": "#' Tools for working with row names or row ids\n#'\n#' @param x A data frame.\n#' @param var Name of column to use for row names/ids. For `column_as_rownames()`,\n#'   this argument can be the variable name or the column number. For\n#'   `rownames_as_column()` and `rowid_as_column()`, the column name must not\n#'   already exist in the data.\n#'\n#' @details\n#' These are similar to `tibble`'s functions `column_to_rownames()`,\n#' `rownames_to_column()` and `rowid_to_column()`. Note that the behavior of\n#' `rowid_as_column()` is different for grouped dataframe: instead of making\n#' the rowid unique across the full dataframe, it creates rowid per group.\n#' Therefore, there can be several rows with the same rowid if they belong to\n#' different groups.\n#'\n#' If you are familiar with `dplyr`, this is similar to doing the following:\n#' ```r\n#' data |>\n#'   group_by(grp) |>\n#'   mutate(id = row_number()) |>\n#'   ungroup()\n#' ```\n#'\n#' @return\n#' A data frame.\n#'\n#' @rdname rownames\n#'\n#' @examples\n#' # Convert between row names and column --------------------------------\n#' test <- rownames_as_column(mtcars, var = \"car\")\n#' test\n#' head(column_as_rownames(test, var = \"car\"))\n#'\n#' @export\nrownames_as_column <- function(x, var = \"rowname\") {\n  if (!insight::object_has_rownames(x)) {\n    insight::format_error(\"The data frame doesn't have rownames.\")\n  }\n  if (is.null(var)) {\n    var <- \"rowname\"\n  }\n  if (!is.character(var)) {\n    insight::format_error(\"Argument 'var' must be of type character.\")\n  }\n  if (var %in% colnames(x)) {\n    insight::format_error(\n      paste0(\"There is already a variable named `\", var, \"` in your dataset.\")\n    )\n  }\n  original_x <- x\n  rn <- data.frame(rn = rownames(x), stringsAsFactors = FALSE)\n  x <- cbind(rn, x)\n  colnames(x)[1] <- var\n  rownames(x) <- NULL\n  x <- .replace_attrs(x, attributes(original_x))\n  x\n}\n\n#' @rdname rownames\n#' @export\ncolumn_as_rownames <- function(x, var = \"rowname\") {\n  if (!is.character(var) && !is.numeric(var)) {\n    insight::format_error(\n      \"Argument `var` must be of type character or numeric.\"\n    )\n  }\n  if (is.character(var) && !var %in% names(x)) {\n    insight::format_error(paste0(\n      \"Variable \\\"\",\n      var,\n      \"\\\" is not in the data frame.\"\n    ))\n  }\n  if (is.numeric(var) && (var > ncol(x) || var <= 0)) {\n    insight::format_error(\n      \"Column \",\n      var,\n      \" does not exist. There are \",\n      ncol(x),\n      \" columns in the data frame.\"\n    )\n  }\n\n  original_x <- x\n  rownames(x) <- x[[var]]\n  x[[var]] <- NULL\n  x <- .replace_attrs(x, attributes(original_x))\n  x\n}\n\n\n#' @rdname rownames\n#' @export\n#' @examples\n#' test_data <- head(iris)\n#'\n#' rowid_as_column(test_data)\n#' rowid_as_column(test_data, var = \"my_id\")\nrowid_as_column <- function(x, var = \"rowid\") {\n  UseMethod(\"rowid_as_column\")\n}\n\n#' @export\nrowid_as_column.default <- function(x, var = \"rowid\") {\n  if (is.null(var)) {\n    var <- \"rowid\"\n  }\n  if (!is.character(var)) {\n    insight::format_error(\"Argument 'var' must be of type character.\")\n  }\n  if (var %in% colnames(x)) {\n    insight::format_error(\n      paste0(\"There is already a variable named `\", var, \"` in your dataset.\")\n    )\n  }\n  original_x <- x\n  rn <- data.frame(rn = seq_len(nrow(x)), stringsAsFactors = FALSE)\n  x <- cbind(rn, x)\n  colnames(x)[1] <- var\n  rownames(x) <- NULL\n  x <- .replace_attrs(x, attributes(original_x))\n  x\n}\n\n\n#' @export\nrowid_as_column.grouped_df <- function(x, var = \"rowid\") {\n  if (!is.character(var)) {\n    insight::format_error(\"Argument 'var' must be of type character.\")\n  }\n  if (var %in% colnames(x)) {\n    insight::format_error(\n      paste0(\"There is already a variable named `\", var, \"` in your dataset.\")\n    )\n  }\n\n  grps <- attr(x, \"groups\", exact = TRUE)\n  grps <- grps[[\".rows\"]]\n\n  for (i in seq_along(grps)) {\n    x[grps[[i]], var] <- seq_along(grps[[i]])\n  }\n\n  # can't just put select = \"var\" because there could be another variable\n  # called var\n  x <- data_relocate(x, paste0(\"^\", var, \"$\"), regex = TRUE, before = 1)\n\n  x\n}\n"
  },
  {
    "path": "R/utils.R",
    "content": "#' @keywords internal\n.get_model_info <- function(model, model_info = NULL, ...) {\n  if (is.null(model_info)) {\n    model_info <- insight::model_info(model)\n  }\n\n  model_info\n}\n\n#' `NULL` coalescing operator\n#'\n#' @keywords internal\n#' @noRd\n`%||%` <- function(x, y) {\n  if (is.null(x)) y else x\n}\n\n\n#' Try to convert object to a dataframe\n#'\n#' @keywords internal\n#' @noRd\n.coerce_to_dataframe <- function(data) {\n  if (!is.data.frame(data)) {\n    data <- tryCatch(\n      as.data.frame(data, stringsAsFactors = FALSE),\n      error = function(e) {\n        insight::format_error(\n          \"`data` must be a data frame, or an object that can be coerced to a data frame.\"\n        )\n      }\n    )\n  }\n  data\n}\n\n\n#' Checks dataframes for syntactically valid column names\n#' Argument \"action\" can be \"warning\", \"error\", or \"message\".\n#'\n#' @keywords internal\n#' @noRd\n.check_dataframe_names <- function(x, action = \"warning\", verbose = TRUE) {\n  if (verbose && !all(colnames(x) == make.names(colnames(x), unique = TRUE))) {\n    insight::format_alert(\n      \"Bad column names (e.g., with spaces) have been detected which might create issues in many functions.\",\n      paste0(\n        \"We recommend to rename following columns: \",\n        text_concatenate(\n          colnames(x)[colnames(x) != make.names(colnames(x), unique = TRUE)],\n          enclose = \"`\"\n        )\n      ),\n      \"You can run `names(mydata) <- make.names(names(mydata))` or use `janitor::clean_names()` for a quick fix.\", # nolint\n      type = action\n    )\n  }\n}\n\n\n#' Fuzzy grep, matches pattern that are close, but not identical\n#' @examples\n#' colnames(iris)\n#' p <- sprintf(\"(%s){~%i}\", \"Spela\", 2)\n#' grep(pattern = p, x = colnames(iris), ignore.case = FALSE)\n#' @keywords internal\n#' @noRd\n.fuzzy_grep <- function(x, pattern, precision = NULL) {\n  if (is.null(precision)) {\n    precision <- round(nchar(pattern) / 3)\n  }\n  if (precision > nchar(pattern)) {\n    return(NULL)\n  }\n  p <- sprintf(\"(%s){~%i}\", pattern, precision)\n  grep(pattern = p, x = x, ignore.case = FALSE)\n}\n\n\n#' create a message string to tell user about matches that could possibly\n#' be the string they were looking for\n#'\n#' @keywords internal\n#' @noRd\n.misspelled_string <- function(source, searchterm, default_message = NULL) {\n  if (is.null(searchterm) || length(searchterm) < 1) {\n    return(default_message)\n  }\n  # used for many matches\n  more_found <- \"\"\n  # init default\n  msg <- \"\"\n  # guess the misspelled string\n  possible_strings <- unlist(\n    lapply(searchterm, function(s) {\n      source[.fuzzy_grep(source, s)] # nolint\n    }),\n    use.names = FALSE\n  )\n  if (length(possible_strings)) {\n    msg <- \"Did you mean \"\n    if (length(possible_strings) > 1) {\n      # make sure we don't print dozens of alternatives for larger data frames\n      if (length(possible_strings) > 5) {\n        more_found <- sprintf(\n          \" We even found %i more possible matches, not shown here.\",\n          length(possible_strings) - 5\n        )\n        possible_strings <- possible_strings[1:5]\n      }\n      msg <- paste0(\n        msg,\n        \"one of \",\n        text_concatenate(possible_strings, enclose = \"\\\"\", last = \" or \")\n      )\n    } else {\n      msg <- paste0(msg, \"\\\"\", possible_strings, \"\\\"\")\n    }\n    msg <- paste0(msg, \"?\", more_found)\n  } else {\n    msg <- default_message\n  }\n  # no double white space\n  insight::trim_ws(msg)\n}\n\n\n#' Check that a vector is sorted\n#' @noRd\n#' @keywords internal\n.is_sorted <- Negate(is.unsorted)\n\n\n#' Replace only custom attributes\n#'\n#' Using \"attributes(out) <- attributes(data)\" or similar doesn't work so well\n#' for big datasets because it takes some time to attribute the row names.\n#'\n#' This function gives only custom attributes to the new dataset.\n#' @noRd\n#' @keywords internal\n.replace_attrs <- function(data, custom_attr) {\n  for (nm in setdiff(names(custom_attr), names(attributes(data.frame())))) {\n    attr(data, which = nm) <- custom_attr[[nm]]\n  }\n  data\n}\n\n\n#' @keywords internal\n.is_date <- function(x) {\n  inherits(x, \"Date\")\n}\n\n\n#' @keywords internal\n.are_weights <- function(w) {\n  !is.null(w) && length(w) && !all(w == 1) && !all(w == w[1])\n}\n\n\n#' @keywords internal\n.factor_to_numeric <- function(x) {\n  # no need to change for numeric\n  if (is.numeric(x)) {\n    return(x)\n  }\n\n  # Dates can be coerced by as.numeric(), w/o as.character()\n  if (inherits(x, \"Date\")) {\n    return(as.numeric(x))\n  }\n\n  # Logicals should be 0/1\n  if (is.logical(x)) {\n    return(as.numeric(x))\n  }\n\n  if (anyNA(suppressWarnings(as.numeric(as.character(stats::na.omit(x)))))) {\n    if (is.character(x)) {\n      x <- as.factor(x)\n    }\n    levels(x) <- 1:nlevels(x)\n  }\n\n  as.numeric(as.character(x))\n}\n\n\n# For standardize_parameters ----------------------------------------------\n\n#' Taken from https://github.com/coolbutuseless/gluestick (licence: MIT)\n#' Same functionality as `{glue}`\n#'\n#' @noRd\n#' @keywords internal\n.gluestick <- function(\n  fmt,\n  src = parent.frame(),\n  open = \"{\",\n  close = \"}\",\n  eval = TRUE\n) {\n  nchar_open <- nchar(open)\n  nchar_close <- nchar(close)\n\n  # validation checks\n  stopifnot(exprs = {\n    is.character(fmt)\n    length(fmt) == 1L\n    is.character(open)\n    length(open) == 1L\n    nchar_open > 0L\n    is.character(close)\n    length(close) == 1\n    nchar_close > 0\n  })\n\n  # Brute force the open/close characters into a regular expression for\n  # extracting the expressions from the format string\n  open <- gsub(\"(.)\", \"\\\\\\\\\\\\1\", open) # Escape everything!!\n  close <- gsub(\"(.)\", \"\\\\\\\\\\\\1\", close) # Escape everything!!\n  re <- paste0(open, \".*?\", close)\n\n  # Extract the delimited expressions\n  matches <- gregexpr(re, fmt)\n  exprs <- regmatches(fmt, matches)[[1]]\n\n  # Remove the delimiters\n  exprs <- substr(exprs, nchar_open + 1L, nchar(exprs) - nchar_close)\n\n  # create a valid sprintf fmt string.\n  #  - replace all \"{expr}\" strings with \"%s\"\n  #  - escape any '%' so sprintf() doesn't try and use them for formatting\n  #    but only if the '%' is NOT followed by an 's'\n  #\n  # gluestick() doesn't deal with any pathological cases\n  fmt_sprintf <- gsub(re, \"%s\", fmt)\n  fmt_sprintf <- gsub(\"%(?!s)\", \"%%\", fmt_sprintf, perl = TRUE)\n\n  # Evaluate\n  if (eval) {\n    fun_args <- lapply(exprs, function(expr) {\n      eval(parse(text = expr), envir = src)\n    })\n  } else {\n    fun_args <- unname(mget(exprs, envir = as.environment(src)))\n  }\n\n  # Create the string(s)\n  do.call(sprintf, c(list(fmt_sprintf), fun_args))\n}\n\n\n#' help-functions\n#' @keywords internal\n#' @noRd\n.data_frame <- function(...) {\n  x <- data.frame(..., stringsAsFactors = FALSE)\n  rownames(x) <- NULL\n  x\n}\n"
  },
  {
    "path": "R/utils_labels.R",
    "content": "# after data transformation, label attributes get lost. This function\n# extracts label attributes from the original vector and adds them back\n# to the transformed vector\n\n#' @keywords internal\n.set_back_labels <- function(\n  new,\n  old,\n  include_values = TRUE,\n  reverse_values = FALSE\n) {\n  # labelled data?\n  attr(new, \"label\") <- attr(old, \"label\", exact = TRUE)\n  value_labels <- attr(old, \"labels\", exact = TRUE)\n  # \"include_values\" is used to preserve value labels\n  if (isTRUE(include_values) && !is.null(value_labels)) {\n    if (reverse_values) {\n      # reverse values? Used for \"reverse_scale()\"\n      attr(new, \"labels\") <- stats::setNames(\n        rev(value_labels),\n        names(value_labels)\n      )\n    } else if (is.numeric(new)) {\n      # keep value oder? Used for \"to_numeric()\"\n      if (any(grepl(\"[^0-9]\", value_labels))) {\n        # if we have any non-numeric characters, convert to numeric\n        attr(new, \"labels\") <- stats::setNames(\n          as.numeric(as.factor(value_labels)),\n          names(value_labels)\n        )\n      } else {\n        # if we have numeric, or \"numeric character\" (like \"1\", \"2\", \"3\" etc.)\n        attr(new, \"labels\") <- stats::setNames(\n          as.numeric(value_labels),\n          names(value_labels)\n        )\n      }\n    } else {\n      attr(new, \"labels\") <- stats::setNames(value_labels, names(value_labels))\n    }\n  } else if (isFALSE(include_values)) {\n    attr(new, \"labels\") <- NULL\n  }\n  new\n}\n\n\n# This functions converts value labels that are saved as attributes\n# into factor levels\n\n#' @keywords internal\n.value_labels_to_levels <- function(x, verbose = TRUE, ...) {\n  # extract value labels\n  value_labels <- attr(x, \"labels\", exact = TRUE)\n  # return, if none\n  if (is.null(value_labels)) {\n    return(x)\n  }\n  # check positions of matching values and levels\n  levels_in_labs <- stats::na.omit(match(value_labels, levels(x)))\n  labs_in_levels <- stats::na.omit(match(levels(x), value_labels))\n  # validation check - if labelled values and levels don't match\n  if (!length(levels_in_labs) || !length(labs_in_levels)) {\n    if (verbose) {\n      insight::format_alert(\n        \"Could not use value labels as factor levels.\",\n        \"Labelled values and factor levels had no match.\"\n      )\n    }\n    return(x)\n  }\n  # check if all levels have matching labels, and if not, tell user\n  if (verbose && nlevels(x) != length(levels_in_labs)) {\n    insight::format_alert(\n      \"Not all factor levels had a matching value label. Non-matching levels were preserved.\"\n    )\n  }\n  # we need to find out which levels have no labelled value\n  missing_levels <- levels(x)[!levels(x) %in% value_labels]\n\n  # and we need to remove those value labels that don't have a matching level\n  value_labels <- value_labels[value_labels %in% levels(x)]\n\n  # for levels that have no label, we just keep the original factor level\n  value_labels <- c(\n    value_labels,\n    stats::setNames(missing_levels, missing_levels)\n  )\n\n  # now we can add back levels\n  levels(x) <- names(value_labels)[order(as.numeric(value_labels))]\n  attr(x, \"labels\") <- NULL\n\n  x\n}\n"
  },
  {
    "path": "R/utils_standardize_center.R",
    "content": "# preparation for standardize and center ----\n#\n# Performs some preparation when standardizing or centering variables,\n# like finding the center or scale, also in relation to some reference values.\n# This function is applied to *vectors*.\n#\n#' @keywords internal\n.process_std_center <- function(\n  x,\n  weights,\n  robust,\n  verbose = TRUE,\n  reference = NULL,\n  center = NULL,\n  scale = NULL\n) {\n  # Warning if all NaNs\n  if (all(is.na(x) | is.infinite(x))) {\n    return(NULL)\n  }\n\n  if (.are_weights(weights)) {\n    valid_x <- !is.na(x) &\n      !is.na(weights) &\n      !is.infinite(x) &\n      !is.infinite(weights)\n    na_values <- is.na(x) | is.na(weights)\n    inf_values <- is.infinite(x) | is.infinite(weights)\n    vals <- x[valid_x]\n    weights <- weights[valid_x]\n  } else {\n    valid_x <- !is.na(x) & !is.infinite(x)\n    na_values <- is.na(x)\n    inf_values <- is.infinite(x)\n    vals <- x[valid_x]\n  }\n\n  # validation checks\n  check <- .check_standardize_numeric(\n    x,\n    name = NULL,\n    verbose = verbose,\n    reference = reference,\n    center = center\n  )\n\n  if (is.factor(vals) || is.character(vals)) {\n    vals <- .factor_to_numeric(vals)\n  }\n\n  # Get center and scale\n  ref <- .get_center_scale(\n    vals,\n    robust,\n    weights,\n    reference,\n    .center = center,\n    .scale = scale,\n    verbose = verbose\n  )\n\n  list(\n    vals = vals,\n    valid_x = valid_x,\n    center = ref$center,\n    scale = ref$scale,\n    check = check,\n    na_values = na_values,\n    inf_values = inf_values\n  )\n}\n\n\n# processing and checking of arguments ----\n#\n# Performs some preparation when standardizing or centering variables,\n# like finding the center or scale, also in relation to some reference values.\n# This function is applied to the *data frame methods*.\n#\n#' @keywords internal\n.process_std_args <- function(\n  x,\n  select,\n  exclude,\n  weights,\n  append,\n  append_suffix = \"_z\",\n  keep_factors,\n  remove_na = \"none\",\n  reference = NULL,\n  .center = NULL,\n  .scale = NULL,\n  keep_character = FALSE,\n  preserve_value_labels = FALSE\n) {\n  # check append argument, and set default\n  if (isFALSE(append)) {\n    append <- NULL\n  } else if (isTRUE(append)) {\n    append <- append_suffix\n  }\n\n  if (!is.null(weights) && is.character(weights)) {\n    if (weights %in% colnames(x)) {\n      exclude <- c(exclude, weights)\n    } else {\n      insight::format_warning(\n        paste0(\n          \"Could not find weighting column `\",\n          weights,\n          \"`. Weighting not carried out.\"\n        )\n      )\n      weights <- NULL\n    }\n  }\n\n  select <- .select_variables(x, select, exclude, keep_factors, keep_character)\n\n  # check if selected variables are in reference\n  if (!is.null(reference) && !all(select %in% names(reference))) {\n    insight::format_error(\n      \"The `reference` must include all variables from `select`.\"\n    )\n  }\n\n  # copy label attributes\n  variable_labels <- insight::compact_list(lapply(\n    x,\n    attr,\n    \"label\",\n    exact = TRUE\n  ))\n  value_labels <- NULL\n  if (preserve_value_labels) {\n    value_labels <- insight::compact_list(lapply(\n      x,\n      attr,\n      \"labels\",\n      exact = TRUE\n    ))\n  }\n\n  # drop NAs\n  remove_na <- match.arg(remove_na, c(\"none\", \"selected\", \"all\"))\n\n  omit <- switch(\n    remove_na,\n    none = logical(nrow(x)),\n    selected = rowSums(vapply(x[select], is.na, FUN.VALUE = logical(nrow(x)))) >\n      0,\n    all = rowSums(vapply(x, is.na, FUN.VALUE = logical(nrow(x)))) > 0\n  )\n  x <- x[!omit, , drop = FALSE]\n\n  if (!is.null(weights) && is.character(weights)) {\n    weights <- x[[weights]]\n  }\n\n  # append standardized variables\n  if (!is.null(append) && append != \"\") {\n    new_variables <- x[select]\n    colnames(new_variables) <- paste0(colnames(new_variables), append)\n    if (length(variable_labels)) {\n      variable_labels <- c(\n        variable_labels,\n        stats::setNames(variable_labels[select], colnames(new_variables))\n      )\n    }\n    if (length(value_labels)) {\n      value_labels <- c(\n        value_labels,\n        stats::setNames(value_labels[select], colnames(new_variables))\n      )\n    }\n    x <- cbind(x, new_variables)\n    select <- colnames(new_variables)\n  }\n\n  # check for reference center and scale\n  if (!is.null(.center)) {\n    # for center(), we have no scale - set it to default value\n    if (is.null(.scale)) {\n      .scale <- rep(1, length(.center))\n    }\n\n    # center and scale must have same length\n    if (length(.center) != length(.scale)) {\n      insight::format_error(\"`center` and `scale` must be of same length.\")\n    }\n\n    # center and scale must either be of length 1 or of same length as selected variables\n    if (length(.center) > 1 && length(.center) != length(select)) {\n      insight::format_error(\n        \"`center` and `scale` must have the same length as the selected variables for standardization or centering.\"\n      )\n    }\n\n    # if of length 1, recycle\n    if (length(.center) == 1) {\n      .center <- rep(.center, length(select))\n      .scale <- rep(.scale, length(select))\n    }\n\n    # set names\n    if (is.null(names(.center))) {\n      .center <- stats::setNames(.center, select)\n    }\n    if (is.null(names(.scale))) {\n      .scale <- stats::setNames(.scale, select)\n    }\n  } else {\n    # use NA if missing, so we can index these as vectors\n    .center <- stats::setNames(rep(NA, length(select)), select)\n    .scale <- stats::setNames(rep(NA, length(select)), select)\n  }\n\n  # add back variable labels\n  if (length(variable_labels)) {\n    for (i in names(variable_labels)) {\n      attr(x[[i]], \"label\") <- variable_labels[[i]]\n    }\n  }\n\n  if (preserve_value_labels && length(value_labels)) {\n    for (i in names(value_labels)) {\n      attr(x[[i]], \"labels\") <- value_labels[[i]]\n    }\n  }\n\n  list(\n    x = x,\n    select = select,\n    exclude = exclude,\n    weights = weights,\n    append = append,\n    center = .center,\n    scale = .scale\n  )\n}\n\n\n# retrieve center and scale information ----\n#' @keywords internal\n.get_center_scale <- function(\n  x,\n  robust = FALSE,\n  weights = NULL,\n  reference = NULL,\n  .center = NULL,\n  .scale = NULL,\n  verbose = TRUE\n) {\n  if (is.null(reference)) {\n    reference <- x\n  }\n\n  # for center(), we have no scale. default to 1\n  if (is.null(.scale) || is.na(.scale) || isFALSE(.scale)) {\n    scale <- 1\n  } else if (isTRUE(.scale)) {\n    if (robust) {\n      scale <- weighted_mad(reference, weights)\n    } else {\n      scale <- weighted_sd(reference, weights)\n    }\n  } else {\n    # we must have a numeric value here\n    scale <- .scale\n  }\n\n  # process center\n  if (is.null(.center) || is.na(.center) || isFALSE(.center)) {\n    center <- 0\n  } else if (isTRUE(.center)) {\n    if (robust) {\n      center <- weighted_median(reference, weights)\n    } else {\n      center <- weighted_mean(reference, weights)\n    }\n  } else {\n    # we must have a numeric value here\n    center <- .center\n  }\n\n  if (scale == 0) {\n    scale <- 1\n    if (verbose) {\n      insight::format_warning(sprintf(\n        \"%s is 0 - variable not standardized (only scaled).\",\n        if (robust) \"MAD\" else \"SD\"\n      ))\n    }\n  }\n\n  list(center = center, scale = scale)\n}\n\n\n# check range of input variables ----\n#' @keywords internal\n.check_standardize_numeric <- function(\n  x,\n  name = NULL,\n  verbose = TRUE,\n  reference = NULL,\n  center\n) {\n  # Warning if only one value\n  if (\n    insight::has_single_value(x) &&\n      is.null(reference) &&\n      (is.null(center) || isTRUE(center))\n  ) {\n    if (verbose) {\n      if (is.null(name)) {\n        insight::format_alert(\n          \"The variable contains only one unique value and will be set to 0.\"\n        )\n      } else {\n        insight::format_alert(\n          paste0(\n            \"The variable `\",\n            name,\n            \"` contains only one unique value and will be set to 0.\"\n          )\n        )\n      }\n    }\n    return(NULL)\n  }\n\n  # Warning if logical vector\n  if (\n    verbose && insight::n_unique(x) == 2 && !is.factor(x) && !is.character(x)\n  ) {\n    if (is.null(name)) {\n      insight::format_alert(\n        \"The variable contains only two different values. Consider converting it to a factor.\"\n      )\n    } else {\n      insight::format_alert(\n        paste0(\n          \"Variable `\",\n          name,\n          \"` contains only two different values. Consider converting it to a factor.\"\n        )\n      )\n    }\n  }\n  x\n}\n\n\n# process append argument ----\n#' @keywords internal\n.process_append <- function(\n  x,\n  select,\n  append,\n  append_suffix = \"_z\",\n  preserve_value_labels = FALSE,\n  keep_factors = TRUE,\n  keep_character = FALSE\n) {\n  # check append argument, and set default\n  if (isFALSE(append)) {\n    append <- NULL\n  } else if (isTRUE(append)) {\n    append <- append_suffix\n  }\n\n  # append recoded variables\n  if (!is.null(append) && append != \"\") {\n    # keep or drop factors and characters\n    select <- .select_variables(\n      x,\n      select,\n      exclude = NULL,\n      keep_factors = keep_factors,\n      keep_character = keep_character\n    )\n\n    # copy label attributes\n    variable_labels <- insight::compact_list(lapply(\n      x,\n      attr,\n      \"label\",\n      exact = TRUE\n    ))\n    value_labels <- NULL\n    if (preserve_value_labels) {\n      value_labels <- insight::compact_list(lapply(\n        x,\n        attr,\n        \"labels\",\n        exact = TRUE\n      ))\n    }\n\n    # add new variables that sould be appended\n    new_variables <- x[select]\n    colnames(new_variables) <- paste0(colnames(new_variables), append)\n    if (length(variable_labels)) {\n      variable_labels <- c(\n        variable_labels,\n        stats::setNames(variable_labels[select], colnames(new_variables))\n      )\n    }\n    if (length(value_labels)) {\n      value_labels <- c(\n        value_labels,\n        stats::setNames(value_labels[select], colnames(new_variables))\n      )\n    }\n    x <- cbind(x, new_variables)\n    select <- colnames(new_variables)\n\n    # add back variable labels\n    if (length(variable_labels)) {\n      for (i in names(variable_labels)) {\n        attr(x[[i]], \"label\") <- variable_labels[[i]]\n      }\n    }\n\n    if (preserve_value_labels && length(value_labels)) {\n      for (i in names(value_labels)) {\n        attr(x[[i]], \"labels\") <- value_labels[[i]]\n      }\n    }\n  }\n  list(x = x, select = select)\n}\n\n\n# variables to standardize and center ----\n#\n# This function mainly serves the purpose to keep or drop factors and\n# character vectors from transformation functions.\n#\n#' @keywords internal\n.select_variables <- function(\n  x,\n  select,\n  exclude,\n  keep_factors,\n  keep_character = FALSE\n) {\n  if (is.null(select)) {\n    select <- names(x)\n  }\n\n  if (!is.null(exclude)) {\n    select <- setdiff(select, exclude)\n  }\n\n  if (!keep_factors) {\n    if (!keep_character) {\n      factors <- vapply(\n        x[select],\n        function(i) is.factor(i) | is.character(i),\n        FUN.VALUE = logical(1L)\n      )\n    } else {\n      factors <- vapply(x[select], is.factor, FUN.VALUE = logical(1L))\n    }\n    select <- select[!factors]\n  }\n\n  select\n}\n\n\n# for grouped df ---------------------------\n#' @keywords internal\n.process_grouped_df <- function(\n  x,\n  select,\n  exclude,\n  append,\n  append_suffix = \"_z\",\n  reference,\n  weights,\n  keep_factors\n) {\n  if (!is.null(reference)) {\n    insight::format_error(\n      \"The `reference` argument cannot be used with grouped standardization for now.\"\n    )\n  }\n\n  # check append argument, and set default\n  if (isFALSE(append)) {\n    append <- NULL\n  } else if (isTRUE(append)) {\n    append <- append_suffix\n  }\n\n  info <- attributes(x)\n\n  grps <- attr(x, \"groups\", exact = TRUE)[[\".rows\"]]\n\n  # for grouped data frames, we can decide to remove group variable from selection\n  grp_vars <- setdiff(colnames(attr(x, \"groups\", exact = TRUE)), \".rows\")\n\n  if (is.numeric(weights)) {\n    insight::format_warning(\n      \"For grouped data frames, `weights` must be a character, not a numeric vector.\",\n      \"Ignoring weightings.\"\n    )\n    weights <- NULL\n  }\n\n  x <- as.data.frame(x)\n  select <- .select_variables(x, select, exclude, keep_factors)\n  select <- setdiff(select, grp_vars)\n\n  # append standardized variables\n  if (!is.null(append) && append != \"\") {\n    new_variables <- x[select]\n    colnames(new_variables) <- paste0(colnames(new_variables), append)\n    x <- cbind(x, new_variables)\n    select <- colnames(new_variables)\n    info$names <- c(info$names, select)\n  }\n\n  list(x = x, info = info, select = select, grps = grps, weights = weights)\n}\n"
  },
  {
    "path": "R/visualisation_recipe.R",
    "content": "#' Prepare objects for visualisation\n#'\n#' @description This function prepares objects for visualisation by returning a list of\n#' layers with data and geoms that can be easily plotted using for instance\n#' `ggplot2`.\n#'\n#' If the `see` package is installed, the call to `visualization_recipe()` can be\n#' replaced by `plot()`, which will internally call the former and then plot it\n#' using `ggplot`. The resulting plot can be customized ad-hoc (by adding\n#' ggplot's geoms, theme or specifications), or via some of the arguments\n#' of `visualisation_recipe()` that control the aesthetic parameters.\n#'\n#' See the specific documentation page for your object's class:\n#'\n#'  - {modelbased}: <https://easystats.github.io/modelbased/reference/visualisation_recipe.estimate_predicted.html>\n#'  - {correlation}: <https://easystats.github.io/correlation/reference/visualisation_recipe.easycormatrix.html>\n#'\n#' @param x An `easystats` object.\n#' @param ... Other arguments passed to other functions.\n#'\n#' @export\nvisualisation_recipe <- function(x, ...) {\n  UseMethod(\"visualisation_recipe\")\n}\n\n\n#' @export\nprint.visualisation_recipe <- function(x, ...) {\n  for (i in seq_along(x)) {\n    l <- x[[paste0(\"l\", i)]]\n    insight::print_color(paste0(\"Layer \", i, \"\\n--------\\n\"), \"blue\")\n    insight::print_color(\n      paste0(\"Geom type: \", ifelse(is.null(l$geom), \"[NULL]\", l$geom), \"\\n\"),\n      \"yellow\"\n    )\n\n    elements <- names(l)[!vapply(l, is.null, FUN.VALUE = logical(1L))]\n\n    # Loop through all elements of list\n    for (element in elements[elements != \"geom\"]) {\n      # Print element name\n      if (element == \"aes\") {\n        cat(\"aes_string(\\n\")\n      } else {\n        cat(paste0(element, \" = \"))\n      }\n\n      # Print element\n      if (element == \"data\") {\n        cat(paste0(\"[\", paste0(dim(l$data), collapse = \" x \"), \"]\"))\n      } else if (element == \"aes\") {\n        for (aes in names(l$aes)) {\n          if (!is.null(l$aes[[aes]])) {\n            if (is.character(l$aes[[aes]])) {\n              cat(paste0(\"  \", aes, \" = '\", l$aes[[aes]], \"'\\n\"))\n            } else {\n              cat(paste0(\"  \", aes, \" = \", l$aes[[aes]], \"\\n\"))\n            }\n          }\n        }\n        cat(\")\")\n      } else {\n        if (\n          is.character(l[[element]]) ||\n            is.numeric(l[[element]]) ||\n            is.factor(l[[element]])\n        ) {\n          if (is.character(l[[element]])) {\n            cat(paste0(\"'\", l[[element]], \"'\"))\n          } else {\n            if (length(l[[element]]) == 1) {\n              cat(l[[element]])\n            } else {\n              cat(paste0(\"c(\", toString(l[[element]]), \")\"))\n            }\n          }\n        } else {\n          cat(paste0(\"class: \", class(l[[element]]), collapse = \"/\"))\n        }\n      }\n      cat(\"\\n\")\n    }\n    cat(\"\\n\")\n  }\n}\n\n\n#' @export\nplot.visualisation_recipe <- function(x, ...) {\n  insight::check_if_installed(\"see\")\n  NextMethod()\n}\n"
  },
  {
    "path": "R/weighted_mean_median_sd_mad.R",
    "content": "#' Weighted Mean, Median, SD, and MAD\n#'\n#' @inheritParams stats::weighted.mean\n#' @inheritParams stats::mad\n#' @param weights A numerical vector of weights the same length as `x` giving\n#' the weights to use for elements of `x`. If `weights = NULL`, `x` is passed\n#' to the non-weighted function.\n#' @param verbose Show warning when `weights` are negative?\n#' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) and infinite\n#' values from `x` and `weights`.\n#'\n#' @examples\n#' ## GPA from Siegel 1994\n#' x <- c(3.7, 3.3, 3.5, 2.8)\n#' wt <- c(5, 5, 4, 1) / 15\n#'\n#' weighted_mean(x, wt)\n#' weighted_median(x, wt)\n#'\n#' weighted_sd(x, wt)\n#' weighted_mad(x, wt)\n#'\n#' @export\nweighted_mean <- function(\n  x,\n  weights = NULL,\n  remove_na = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  if (!.are_weights(weights) || !.validate_weights(weights, verbose)) {\n    return(mean(x, na.rm = remove_na))\n  }\n\n  # remove missings\n  complete <- .clean_missings(x, weights, remove_na)\n  stats::weighted.mean(complete$x, complete$weights, na.rm = remove_na)\n}\n\n\n#' @export\n#' @rdname weighted_mean\nweighted_median <- function(\n  x,\n  weights = NULL,\n  remove_na = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  if (!.are_weights(weights) || !.validate_weights(weights, verbose)) {\n    return(stats::median(x, na.rm = remove_na))\n  }\n\n  p <- 0.5 # split probability\n\n  # remove missings\n  complete <- .clean_missings(x, weights, remove_na)\n\n  order <- order(complete$x)\n  x <- complete$x[order]\n  weights <- complete$weights[order]\n\n  rw <- cumsum(weights) / sum(weights)\n  # validation check\n  if (all(is.na(rw))) {\n    return(NA_real_)\n  }\n\n  md.values <- min(which(rw >= p))\n\n  if (rw[md.values] == p) {\n    q <- mean(x[md.values:(md.values + 1)])\n  } else {\n    q <- x[md.values]\n  }\n\n  q\n}\n\n\n#' @export\n#' @rdname weighted_mean\nweighted_sd <- function(\n  x,\n  weights = NULL,\n  remove_na = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  # from cov.wt\n  if (!.are_weights(weights) || !.validate_weights(weights, verbose)) {\n    return(stats::sd(x, na.rm = remove_na))\n  }\n\n  # remove missings\n  complete <- .clean_missings(x, weights, remove_na)\n\n  weights1 <- complete$weights / sum(complete$weights)\n  center <- sum(weights1 * complete$x)\n  xc <- sqrt(weights1) * (complete$x - center)\n  var <- (t(xc) %*% xc) / (1 - sum(weights1^2))\n  sqrt(as.vector(var))\n}\n\n#' @export\n#' @rdname weighted_mean\nweighted_mad <- function(\n  x,\n  weights = NULL,\n  constant = 1.4826,\n  remove_na = TRUE,\n  verbose = TRUE,\n  ...\n) {\n  # From matrixStats\n  if (!.are_weights(weights) || !.validate_weights(weights, verbose)) {\n    return(stats::mad(x, na.rm = remove_na))\n  }\n\n  center <- weighted_median(x, weights = weights, remove_na = remove_na)\n  x <- abs(x - center)\n  constant * weighted_median(x, weights = weights, remove_na = remove_na)\n}\n\n\n# Utils -------------------------------------------------------------------\n\n.validate_weights <- function(weights, verbose = TRUE) {\n  pos <- all(weights > 0, na.rm = TRUE)\n\n  if (isTRUE(!pos) && isTRUE(verbose)) {\n    insight::format_warning(\n      \"Some `weights` were negative. Weighting not carried out.\"\n    )\n  }\n\n  pos\n}\n\n.clean_missings <- function(x, weights, remove_na) {\n  if (isTRUE(remove_na)) {\n    flag <- FALSE\n    if (any(is.infinite(x)) || any(is.infinite(weights))) {\n      # remove Inf\n      x[is.infinite(x)] <- NA\n      weights[is.infinite(weights)] <- NA\n      flag <- TRUE\n    }\n\n    if (anyNA(x) || anyNA(weights)) {\n      # remove missings\n      x[is.na(weights)] <- NA\n      weights[is.na(x)] <- NA\n      flag <- TRUE\n    }\n\n    if (flag) {\n      weights <- stats::na.omit(weights)\n      x <- stats::na.omit(x)\n    }\n  }\n\n  list(x = x, weights = weights)\n}\n"
  },
  {
    "path": "R/winsorize.R",
    "content": "#' Winsorize data\n#'\n#' @details\n#'\n#' Winsorizing or winsorization is the transformation of statistics by limiting\n#' extreme values in the statistical data to reduce the effect of possibly\n#' spurious outliers. The distribution of many statistics can be heavily\n#' influenced by outliers. A typical strategy is to set all outliers (values\n#' beyond a certain threshold) to a specified percentile of the data; for\n#' example, a `90%` winsorization would see all data below the 5th percentile set\n#' to the 5th percentile, and data above the 95th percentile set to the 95th\n#' percentile. Winsorized estimators are usually more robust to outliers than\n#' their more standard forms.\n#'\n#' @return\n#'\n#' A data frame with winsorized columns or a winsorized vector.\n#'\n#' @param data data frame or vector.\n#' @param threshold The amount of winsorization, depends on the value of `method`:\n#' - For `method = \"percentile\"`: the amount to winsorize from *each* tail.\n#'   The value of `threshold` must be between 0 and 0.5 and of length 1.\n#' - For `method = \"zscore\"`: the number of *SD*/*MAD*-deviations from the\n#'   *mean*/*median* (see `robust`). The value of `threshold` must be greater\n#'   than 0 and of length 1.\n#' - For `method = \"raw\"`: a vector of length 2 with the lower and upper bound\n#'   for winsorization.\n#' @param method One of \"percentile\" (default), \"zscore\", or \"raw\".\n#' @param robust Logical, if TRUE, winsorizing through the \"zscore\" method is\n#'   done via the median and the median absolute deviation (MAD); if FALSE, via\n#'   the mean and the standard deviation.\n#' @param ... Currently not used.\n#' @param verbose Not used anymore since `datawizard` 0.6.6.\n#'\n#' @examples\n#' hist(iris$Sepal.Length, main = \"Original data\")\n#'\n#' hist(winsorize(iris$Sepal.Length, threshold = 0.2),\n#'   xlim = c(4, 8), main = \"Percentile Winsorization\"\n#' )\n#'\n#' hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = \"zscore\"),\n#'   xlim = c(4, 8), main = \"Mean (+/- SD) Winsorization\"\n#' )\n#'\n#' hist(winsorize(iris$Sepal.Length, threshold = 1.5, method = \"zscore\", robust = TRUE),\n#'   xlim = c(4, 8), main = \"Median (+/- MAD) Winsorization\"\n#' )\n#'\n#' hist(winsorize(iris$Sepal.Length, threshold = c(5, 7.5), method = \"raw\"),\n#'   xlim = c(4, 8), main = \"Raw Thresholds\"\n#' )\n#'\n#' # Also works on a data frame:\n#' winsorize(iris, threshold = 0.2)\n#'\n#' @inherit data_rename seealso\n#' @export\nwinsorize <- function(data, ...) {\n  UseMethod(\"winsorize\")\n}\n\n\n#' @export\nwinsorize.factor <- function(data, ...) {\n  data\n}\n\n#' @export\nwinsorize.character <- winsorize.factor\n\n#' @export\nwinsorize.logical <- winsorize.factor\n\n#' @export\nwinsorize.data.frame <- function(\n  data,\n  threshold = 0.2,\n  method = \"percentile\",\n  robust = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  data[] <- lapply(\n    data,\n    winsorize,\n    threshold = threshold,\n    method = method,\n    robust = robust,\n    verbose = verbose\n  )\n  data\n}\n\n#' @rdname winsorize\n#' @export\nwinsorize.numeric <- function(\n  data,\n  threshold = 0.2,\n  method = \"percentile\",\n  robust = FALSE,\n  verbose = TRUE,\n  ...\n) {\n  method <- match.arg(method, choices = c(\"percentile\", \"zscore\", \"raw\"))\n\n  if (method == \"raw\" && length(threshold) != 2L) {\n    insight::format_error(\n      \"`threshold` must be of length 2 for lower and upper bound.\"\n    )\n  }\n\n  if (method == \"percentile\") {\n    if (threshold < 0 || threshold > 0.5) {\n      insight::format_error(\n        \"`threshold` for winsorization must be a scalar between 0 and 0.5.\"\n      )\n    }\n\n    y <- sort(data)\n    n <- length(data)\n    ibot <- floor(threshold * n) + 1\n    itop <- length(data) - ibot + 1\n\n    threshold <- c(y[ibot], y[itop])\n  }\n\n  if (method == \"zscore\") {\n    if (threshold <= 0) {\n      insight::format_error(\n        \"'threshold' for winsorization must be a scalar greater than 0.\"\n      )\n    }\n\n    if (isTRUE(robust)) {\n      centeral <- stats::median(data, na.rm = TRUE)\n      deviation <- stats::mad(data, center = centeral, na.rm = TRUE)\n    } else {\n      centeral <- mean(data, na.rm = TRUE)\n      deviation <- stats::sd(data, na.rm = TRUE)\n    }\n\n    threshold <- centeral + c(-1, 1) * deviation * threshold\n  }\n\n  data[data < threshold[1]] <- threshold[1]\n  data[data > threshold[2]] <- threshold[2]\n  return(data)\n}\n"
  },
  {
    "path": "README.Rmd",
    "content": "---\noutput: github_document\n---\n\n# `datawizard`: Easy Data Wrangling and Statistical Transformations <img src='man/figures/logo.png' align=\"right\" height=\"139\" />\n\n```{r, echo=FALSE, warning=FALSE, message=FALSE}\nknitr::opts_chunk$set(\n  collapse = TRUE,\n  dpi = 300,\n  out.width = \"100%\",\n  fig.path = \"man/figures/\",\n  comment = \"#>\"\n)\n\nset.seed(333)\nlibrary(datawizard)\n```\n\n[![DOI](https://joss.theoj.org/papers/10.21105/joss.04684/status.svg)](https://doi.org/10.21105/joss.04684)\n[![downloads](https://cranlogs.r-pkg.org/badges/datawizard)](https://cran.r-project.org/package=datawizard)\n[![total](https://cranlogs.r-pkg.org/badges/grand-total/datawizard)](https://cranlogs.r-pkg.org/)\n\n<!-- ***:sparkles: Hockety pockety wockety wack, prepare this data forth and back*** -->\n\n<!-- ***Hockety pockety wockety wock, messy data is in shock*** -->\n\n<!-- ***Hockety pockety wockety woss, you can cite i-it from JOSS*** <sup>(soon)</sup> -->\n\n<!-- ***Hockety pockety wockety wass, datawizard saves your ass! :sparkles:*** -->\n\n`{datawizard}` is a lightweight package to easily manipulate, clean, transform, and prepare your data for analysis. It is part of the [easystats ecosystem](https://easystats.github.io/easystats/), a suite of R packages to deal with your entire statistical analysis, from cleaning the data to reporting the results.\n\nIt covers two aspects of data preparation:\n\n- **Data manipulation**: `{datawizard}` offers a very similar set of functions to that of the *tidyverse* packages, such as a `{dplyr}` and `{tidyr}`, to select, filter and reshape data, with a few key differences. 1) All data manipulation functions start with the prefix `data_*` (which makes them easy to identify). 2) Although most functions can be used exactly as their *tidyverse* equivalents, they are also string-friendly (which makes them easy to program with and use inside functions). Finally, `{datawizard}` is super lightweight (no dependencies, similar to [poorman](https://github.com/nathaneastwood/poorman)), which makes it awesome for developers to use in their packages.\n\n- **Statistical transformations**: `{datawizard}` also has powerful functions to easily apply common data [transformations](https://easystats.github.io/datawizard/reference/index.html#statistical-transformations), including standardization, normalization, rescaling, rank-transformation, scale reversing, recoding, binning, etc.\n\n\n\n</br>\n\n<img src='https://media.giphy.com/media/VcizxCUIgaKpa/giphy.gif' width=\"300\"/>\n\n</br>\n\n# Installation\n\n[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/datawizard)](https://cran.r-project.org/package=datawizard) [![datawizard status badge](https://easystats.r-universe.dev/badges/datawizard)](https://easystats.r-universe.dev) [![codecov](https://codecov.io/gh/easystats/datawizard/branch/main/graph/badge.svg)](https://app.codecov.io/gh/easystats/datawizard) [![R-CMD-check](https://github.com/easystats/datawizard/workflows/R-CMD-check/badge.svg?branch=main)](https://github.com/easystats/datawizard/actions)\n\nType | Source | Command\n---|---|---\nRelease | CRAN | `install.packages(\"datawizard\")`\nDevelopment | r-universe | `install.packages(\"datawizard\", repos = \"https://easystats.r-universe.dev\")`\nDevelopment | GitHub | `remotes::install_github(\"easystats/datawizard\")`\n\n> **Tip**\n>\n> **Instead of `library(datawizard)`, use `library(easystats)`.**\n> **This will make all features of the  easystats-ecosystem available.**\n>\n> **To stay updated, use `easystats::install_latest()`.**\n\n# Citation\n\nTo cite the package, run the following command:\n\n```{r, comment=\"\"}\ncitation(\"datawizard\")\n```\n\n# Features\n\n[![Documentation](https://img.shields.io/badge/documentation-datawizard-orange.svg?colorB=E91E63)](https://easystats.github.io/datawizard/)\n[![Blog](https://img.shields.io/badge/blog-easystats-orange.svg?colorB=FF9800)](https://easystats.github.io/blog/posts/)\n[![Features](https://img.shields.io/badge/features-datawizard-orange.svg?colorB=2196F3)](https://easystats.github.io/datawizard/reference/index.html)\n\nMost courses and tutorials about statistical modeling assume that you are working with a clean and tidy dataset. In practice, however, a major part of doing statistical modeling is preparing your data--cleaning up values, creating new columns, reshaping the dataset, or transforming some variables. `{datawizard}` provides easy to use tools to perform these common, critical, and sometimes tedious data preparation tasks.\n\n## Data wrangling\n\n### Select, filter and remove variables\n\nThe package provides helpers to filter rows meeting certain conditions...\n\n```{r}\ndata_match(mtcars, data.frame(vs = 0, am = 1))\n```\n\n... or logical expressions:\n\n```{r}\ndata_filter(mtcars, vs == 0 & am == 1)\n```\n\nFinding columns in a data frame, or retrieving the data of selected columns, can be  achieved using `extract_column_names()` or `data_select()`:\n\n```{r}\n# find column names matching a pattern\nextract_column_names(iris, starts_with(\"Sepal\"))\n\n# return data columns matching a pattern\ndata_select(iris, starts_with(\"Sepal\")) |> head()\n```\n\nIt is also possible to extract one or more variables:\n\n```{r}\n# single variable\ndata_extract(mtcars, \"gear\")\n\n# more variables\nhead(data_extract(iris, ends_with(\"Width\")))\n```\n\nDue to the consistent API, removing variables is just as simple:\n\n```{r}\nhead(data_remove(iris, starts_with(\"Sepal\")))\n```\n\n### Reorder or rename\n\n```{r}\nhead(data_relocate(iris, select = \"Species\", before = \"Sepal.Length\"))\n```\n\n```{r}\nhead(data_rename(iris, c(\"Sepal.Length\", \"Sepal.Width\"), c(\"length\", \"width\")))\n```\n\n### Merge\n\n```{r}\nx <- data.frame(a = 1:3, b = c(\"a\", \"b\", \"c\"), c = 5:7, id = 1:3)\ny <- data.frame(c = 6:8, d = c(\"f\", \"g\", \"h\"), e = 100:102, id = 2:4)\n\nx\ny\n\ndata_merge(x, y, join = \"full\")\n\ndata_merge(x, y, join = \"left\")\n\ndata_merge(x, y, join = \"right\")\n\ndata_merge(x, y, join = \"semi\", by = \"c\")\n\ndata_merge(x, y, join = \"anti\", by = \"c\")\n\ndata_merge(x, y, join = \"inner\")\n\ndata_merge(x, y, join = \"bind\")\n```\n\n### Reshape\n\nA common data wrangling task is to reshape data.\n\nEither to go from wide/Cartesian to long/tidy format\n\n```{r}\nwide_data <- data.frame(replicate(5, rnorm(10)))\n\nhead(data_to_long(wide_data))\n```\n\nor the other way\n\n```{r}\nlong_data <- data_to_long(wide_data, rows_to = \"Row_ID\") # Save row number\n\ndata_to_wide(long_data,\n  names_from = \"name\",\n  values_from = \"value\",\n  id_cols = \"Row_ID\"\n)\n```\n\n### Empty rows and columns\n\n```{r}\ntmp <- data.frame(\n  a = c(1, 2, 3, NA, 5),\n  b = c(1, NA, 3, NA, 5),\n  c = c(NA, NA, NA, NA, NA),\n  d = c(1, NA, 3, NA, 5)\n)\n\ntmp\n\n# indices of empty columns or rows\nempty_columns(tmp)\nempty_rows(tmp)\n\n# remove empty columns or rows\nremove_empty_columns(tmp)\nremove_empty_rows(tmp)\n\n# remove empty columns and rows\nremove_empty(tmp)\n```\n\n### Recode or cut dataframe\n\n```{r}\nset.seed(123)\nx <- sample(1:10, size = 50, replace = TRUE)\n\ntable(x)\n\n# cut into 3 groups, based on distribution (quantiles)\ntable(categorize(x, split = \"quantile\", n_groups = 3))\n```\n\n## Data Transformations\n\nThe packages also contains multiple functions to help transform data.\n\n### Standardize\n\nFor example, to standardize (*z*-score) data:\n\n```{r}\n# before\nsummary(swiss)\n\n# after\nsummary(standardize(swiss))\n```\n\n### Winsorize\n\nTo winsorize data:\n\n```{r}\n# before\nanscombe\n\n# after\nwinsorize(anscombe)\n```\n\n### Center\n\nTo grand-mean center data\n\n```{r}\ncenter(anscombe)\n```\n\n### Ranktransform\n\nTo rank-transform data:\n\n```{r}\n# before\nhead(trees)\n\n# after\nhead(ranktransform(trees))\n```\n\n### Rescale\n\nTo rescale a numeric variable to a new range:\n\n```{r}\nchange_scale(c(0, 1, 5, -5, -2))\n```\n\n### Rotate or transpose\n\n```{r}\nx <- mtcars[1:3, 1:4]\n\nx\n\ndata_rotate(x)\n```\n\n\n## Data properties\n\n`datawizard` provides a way to provide comprehensive descriptive summary for all variables in a dataframe:\n\n```{r}\ndata(iris)\ndescribe_distribution(iris)\n```\n\nOr even just a variable\n\n```{r}\ndescribe_distribution(mtcars$wt)\n```\n\nThere are also some additional data properties that can be computed using this package.\n\n```{r}\nx <- (-10:10)^3 + rnorm(21, 0, 100)\nsmoothness(x, method = \"diff\")\n```\n\n## Function design and pipe-workflow\n\nThe design of the `{datawizard}` functions follows a design principle that makes it easy for user to understand and remember how functions work:\n\n1. the first argument is the data\n2. for methods that work on data frames, two arguments are following to `select` and `exclude` variables\n3. the following arguments are arguments related to the specific tasks of the functions\n\nMost important, functions that accept data frames usually have this as their first argument, and also return a (modified) data frame again. Thus, `{datawizard}` integrates smoothly into a \"pipe-workflow\".\n\n```{r}\niris |>\n  # all rows where Species is \"versicolor\" or \"virginica\"\n  data_filter(Species %in% c(\"versicolor\", \"virginica\")) |>\n  # select only columns with \".\" in names (i.e. drop Species)\n  data_select(contains(\"\\\\.\")) |>\n  # move columns that ends with \"Length\" to start of data frame\n  data_relocate(ends_with(\"Length\")) |>\n  # remove fourth column\n  data_remove(4) |>\n  head()\n```\n\n# Contributing and Support\n\nIn case you want to file an issue or contribute in another way to the package, please follow [this guide](https://easystats.github.io/datawizard/CONTRIBUTING.html). For questions about the functionality, you may either contact us via email or also file an issue.\n\n# Code of Conduct\n\nPlease note that this project is released with a\n[Contributor Code of Conduct](https://easystats.github.io/datawizard/CODE_OF_CONDUCT.html). By participating in this project you agree to abide by its terms.\n"
  },
  {
    "path": "README.md",
    "content": "\n# `datawizard`: Easy Data Wrangling and Statistical Transformations <img src='man/figures/logo.png' align=\"right\" height=\"139\" />\n\n[![DOI](https://joss.theoj.org/papers/10.21105/joss.04684/status.svg)](https://doi.org/10.21105/joss.04684)\n[![downloads](https://cranlogs.r-pkg.org/badges/datawizard)](https://cran.r-project.org/package=datawizard)\n[![total](https://cranlogs.r-pkg.org/badges/grand-total/datawizard)](https://cranlogs.r-pkg.org/)\n\n<!-- ***:sparkles: Hockety pockety wockety wack, prepare this data forth and back*** -->\n\n<!-- ***Hockety pockety wockety wock, messy data is in shock*** -->\n\n<!-- ***Hockety pockety wockety woss, you can cite i-it from JOSS*** <sup>(soon)</sup> -->\n\n<!-- ***Hockety pockety wockety wass, datawizard saves your ass! :sparkles:*** -->\n\n`{datawizard}` is a lightweight package to easily manipulate, clean,\ntransform, and prepare your data for analysis. It is part of the\n[easystats ecosystem](https://easystats.github.io/easystats/), a suite\nof R packages to deal with your entire statistical analysis, from\ncleaning the data to reporting the results.\n\nIt covers two aspects of data preparation:\n\n- **Data manipulation**: `{datawizard}` offers a very similar set of\n  functions to that of the *tidyverse* packages, such as a `{dplyr}` and\n  `{tidyr}`, to select, filter and reshape data, with a few key\n  differences. 1) All data manipulation functions start with the prefix\n  `data_*` (which makes them easy to identify). 2) Although most\n  functions can be used exactly as their *tidyverse* equivalents, they\n  are also string-friendly (which makes them easy to program with and\n  use inside functions). Finally, `{datawizard}` is super lightweight\n  (no dependencies, similar to\n  [poorman](https://github.com/nathaneastwood/poorman)), which makes it\n  awesome for developers to use in their packages.\n\n- **Statistical transformations**: `{datawizard}` also has powerful\n  functions to easily apply common data\n  [transformations](https://easystats.github.io/datawizard/reference/index.html#statistical-transformations),\n  including standardization, normalization, rescaling,\n  rank-transformation, scale reversing, recoding, binning, etc.\n\n</br>\n\n<img src='https://media.giphy.com/media/VcizxCUIgaKpa/giphy.gif' width=\"300\"/>\n\n</br>\n\n# Installation\n\n[![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/datawizard)](https://cran.r-project.org/package=datawizard)\n[![datawizard status\nbadge](https://easystats.r-universe.dev/badges/datawizard)](https://easystats.r-universe.dev)\n[![codecov](https://codecov.io/gh/easystats/datawizard/branch/main/graph/badge.svg)](https://app.codecov.io/gh/easystats/datawizard)\n[![R-CMD-check](https://github.com/easystats/datawizard/workflows/R-CMD-check/badge.svg?branch=main)](https://github.com/easystats/datawizard/actions)\n\n| Type | Source | Command |\n|----|----|----|\n| Release | CRAN | `install.packages(\"datawizard\")` |\n| Development | r-universe | `install.packages(\"datawizard\", repos = \"https://easystats.r-universe.dev\")` |\n| Development | GitHub | `remotes::install_github(\"easystats/datawizard\")` |\n\n> **Tip**\n>\n> **Instead of `library(datawizard)`, use `library(easystats)`.** **This\n> will make all features of the easystats-ecosystem available.**\n>\n> **To stay updated, use `easystats::install_latest()`.**\n\n# Citation\n\nTo cite the package, run the following command:\n\n``` r\ncitation(\"datawizard\")\nTo cite package 'datawizard' in publications use:\n\n  Patil et al., (2022). datawizard: An R Package for Easy Data\n  Preparation and Statistical Transformations. Journal of Open Source\n  Software, 7(78), 4684, https://doi.org/10.21105/joss.04684\n\nA BibTeX entry for LaTeX users is\n\n  @Article{,\n    title = {{datawizard}: An {R} Package for Easy Data Preparation and Statistical Transformations},\n    author = {Indrajeet Patil and Dominique Makowski and Mattan S. Ben-Shachar and Brenton M. Wiernik and Etienne Bacher and Daniel Lüdecke},\n    journal = {Journal of Open Source Software},\n    year = {2022},\n    volume = {7},\n    number = {78},\n    pages = {4684},\n    doi = {10.21105/joss.04684},\n  }\n```\n\n# Features\n\n[![Documentation](https://img.shields.io/badge/documentation-datawizard-orange.svg?colorB=E91E63)](https://easystats.github.io/datawizard/)\n[![Blog](https://img.shields.io/badge/blog-easystats-orange.svg?colorB=FF9800)](https://easystats.github.io/blog/posts/)\n[![Features](https://img.shields.io/badge/features-datawizard-orange.svg?colorB=2196F3)](https://easystats.github.io/datawizard/reference/index.html)\n\nMost courses and tutorials about statistical modeling assume that you\nare working with a clean and tidy dataset. In practice, however, a major\npart of doing statistical modeling is preparing your data–cleaning up\nvalues, creating new columns, reshaping the dataset, or transforming\nsome variables. `{datawizard}` provides easy to use tools to perform\nthese common, critical, and sometimes tedious data preparation tasks.\n\n## Data wrangling\n\n### Select, filter and remove variables\n\nThe package provides helpers to filter rows meeting certain conditions…\n\n``` r\ndata_match(mtcars, data.frame(vs = 0, am = 1))\n#>                 mpg cyl  disp  hp drat    wt  qsec vs am gear carb\n#> Mazda RX4      21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4\n#> Mazda RX4 Wag  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4\n#> Porsche 914-2  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2\n#> Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4\n#> Ferrari Dino   19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6\n#> Maserati Bora  15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8\n```\n\n… or logical expressions:\n\n``` r\ndata_filter(mtcars, vs == 0 & am == 1)\n#>                 mpg cyl  disp  hp drat    wt  qsec vs am gear carb\n#> Mazda RX4      21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4\n#> Mazda RX4 Wag  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4\n#> Porsche 914-2  26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2\n#> Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4\n#> Ferrari Dino   19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6\n#> Maserati Bora  15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8\n```\n\nFinding columns in a data frame, or retrieving the data of selected\ncolumns, can be achieved using `extract_column_names()` or\n`data_select()`:\n\n``` r\n# find column names matching a pattern\nextract_column_names(iris, starts_with(\"Sepal\"))\n#> [1] \"Sepal.Length\" \"Sepal.Width\"\n\n# return data columns matching a pattern\ndata_select(iris, starts_with(\"Sepal\")) |> head()\n#>   Sepal.Length Sepal.Width\n#> 1          5.1         3.5\n#> 2          4.9         3.0\n#> 3          4.7         3.2\n#> 4          4.6         3.1\n#> 5          5.0         3.6\n#> 6          5.4         3.9\n```\n\nIt is also possible to extract one or more variables:\n\n``` r\n# single variable\ndata_extract(mtcars, \"gear\")\n#>  [1] 4 4 4 3 3 3 3 4 4 4 4 3 3 3 3 3 3 4 4 4 3 3 3 3 3 4 5 5 5 5 5 4\n\n# more variables\nhead(data_extract(iris, ends_with(\"Width\")))\n#>   Sepal.Width Petal.Width\n#> 1         3.5         0.2\n#> 2         3.0         0.2\n#> 3         3.2         0.2\n#> 4         3.1         0.2\n#> 5         3.6         0.2\n#> 6         3.9         0.4\n```\n\nDue to the consistent API, removing variables is just as simple:\n\n``` r\nhead(data_remove(iris, starts_with(\"Sepal\")))\n#>   Petal.Length Petal.Width Species\n#> 1          1.4         0.2  setosa\n#> 2          1.4         0.2  setosa\n#> 3          1.3         0.2  setosa\n#> 4          1.5         0.2  setosa\n#> 5          1.4         0.2  setosa\n#> 6          1.7         0.4  setosa\n```\n\n### Reorder or rename\n\n``` r\nhead(data_relocate(iris, select = \"Species\", before = \"Sepal.Length\"))\n#>   Species Sepal.Length Sepal.Width Petal.Length Petal.Width\n#> 1  setosa          5.1         3.5          1.4         0.2\n#> 2  setosa          4.9         3.0          1.4         0.2\n#> 3  setosa          4.7         3.2          1.3         0.2\n#> 4  setosa          4.6         3.1          1.5         0.2\n#> 5  setosa          5.0         3.6          1.4         0.2\n#> 6  setosa          5.4         3.9          1.7         0.4\n```\n\n``` r\nhead(data_rename(iris, c(\"Sepal.Length\", \"Sepal.Width\"), c(\"length\", \"width\")))\n#>   length width Petal.Length Petal.Width Species\n#> 1    5.1   3.5          1.4         0.2  setosa\n#> 2    4.9   3.0          1.4         0.2  setosa\n#> 3    4.7   3.2          1.3         0.2  setosa\n#> 4    4.6   3.1          1.5         0.2  setosa\n#> 5    5.0   3.6          1.4         0.2  setosa\n#> 6    5.4   3.9          1.7         0.4  setosa\n```\n\n### Merge\n\n``` r\nx <- data.frame(a = 1:3, b = c(\"a\", \"b\", \"c\"), c = 5:7, id = 1:3)\ny <- data.frame(c = 6:8, d = c(\"f\", \"g\", \"h\"), e = 100:102, id = 2:4)\n\nx\n#>   a b c id\n#> 1 1 a 5  1\n#> 2 2 b 6  2\n#> 3 3 c 7  3\ny\n#>   c d   e id\n#> 1 6 f 100  2\n#> 2 7 g 101  3\n#> 3 8 h 102  4\n\ndata_merge(x, y, join = \"full\")\n#>    a    b c id    d   e\n#> 3  1    a 5  1 <NA>  NA\n#> 1  2    b 6  2    f 100\n#> 2  3    c 7  3    g 101\n#> 4 NA <NA> 8  4    h 102\n\ndata_merge(x, y, join = \"left\")\n#>   a b c id    d   e\n#> 3 1 a 5  1 <NA>  NA\n#> 1 2 b 6  2    f 100\n#> 2 3 c 7  3    g 101\n\ndata_merge(x, y, join = \"right\")\n#>    a    b c id d   e\n#> 1  2    b 6  2 f 100\n#> 2  3    c 7  3 g 101\n#> 3 NA <NA> 8  4 h 102\n\ndata_merge(x, y, join = \"semi\", by = \"c\")\n#>   a b c id\n#> 2 2 b 6  2\n#> 3 3 c 7  3\n\ndata_merge(x, y, join = \"anti\", by = \"c\")\n#>   a b c id\n#> 1 1 a 5  1\n\ndata_merge(x, y, join = \"inner\")\n#>   a b c id d   e\n#> 1 2 b 6  2 f 100\n#> 2 3 c 7  3 g 101\n\ndata_merge(x, y, join = \"bind\")\n#>    a    b c id    d   e\n#> 1  1    a 5  1 <NA>  NA\n#> 2  2    b 6  2 <NA>  NA\n#> 3  3    c 7  3 <NA>  NA\n#> 4 NA <NA> 6  2    f 100\n#> 5 NA <NA> 7  3    g 101\n#> 6 NA <NA> 8  4    h 102\n```\n\n### Reshape\n\nA common data wrangling task is to reshape data.\n\nEither to go from wide/Cartesian to long/tidy format\n\n``` r\nwide_data <- data.frame(replicate(5, rnorm(10)))\n\nhead(data_to_long(wide_data))\n#>   name       value\n#> 1   X1 -0.08281164\n#> 2   X2 -1.12490028\n#> 3   X3 -0.70632036\n#> 4   X4 -0.70278946\n#> 5   X5  0.07633326\n#> 6   X1  1.93468099\n```\n\nor the other way\n\n``` r\nlong_data <- data_to_long(wide_data, rows_to = \"Row_ID\") # Save row number\n\ndata_to_wide(long_data,\n  names_from = \"name\",\n  values_from = \"value\",\n  id_cols = \"Row_ID\"\n)\n#>    Row_ID          X1          X2          X3         X4          X5\n#> 1       1 -0.08281164 -1.12490028 -0.70632036 -0.7027895  0.07633326\n#> 2       2  1.93468099 -0.87430362  0.96687656  0.2998642 -0.23035595\n#> 3       3 -2.05128979  0.04386162 -0.71016648  1.1494697  0.31746484\n#> 4       4  0.27773897 -0.58397514 -0.05917365 -0.3016415 -1.59268440\n#> 5       5 -1.52596060 -0.82329858 -0.23094342 -0.5473394 -0.18194062\n#> 6       6 -0.26916362  0.11059280  0.69200045 -0.3854041  1.75614174\n#> 7       7  1.23305388  0.36472778  1.35682290  0.2763720  0.11394932\n#> 8       8  0.63360774  0.05370100  1.78872284  0.1518608 -0.29216508\n#> 9       9  0.35271746  1.36867235  0.41071582 -0.4313808  1.75409316\n#> 10     10 -0.56048248 -0.38045724 -2.18785470 -1.8705001  1.80958455\n```\n\n### Empty rows and columns\n\n``` r\ntmp <- data.frame(\n  a = c(1, 2, 3, NA, 5),\n  b = c(1, NA, 3, NA, 5),\n  c = c(NA, NA, NA, NA, NA),\n  d = c(1, NA, 3, NA, 5)\n)\n\ntmp\n#>    a  b  c  d\n#> 1  1  1 NA  1\n#> 2  2 NA NA NA\n#> 3  3  3 NA  3\n#> 4 NA NA NA NA\n#> 5  5  5 NA  5\n\n# indices of empty columns or rows\nempty_columns(tmp)\n#> c \n#> 3\nempty_rows(tmp)\n#> [1] 4\n\n# remove empty columns or rows\nremove_empty_columns(tmp)\n#>    a  b  d\n#> 1  1  1  1\n#> 2  2 NA NA\n#> 3  3  3  3\n#> 4 NA NA NA\n#> 5  5  5  5\nremove_empty_rows(tmp)\n#>   a  b  c  d\n#> 1 1  1 NA  1\n#> 2 2 NA NA NA\n#> 3 3  3 NA  3\n#> 5 5  5 NA  5\n\n# remove empty columns and rows\nremove_empty(tmp)\n#>   a  b  d\n#> 1 1  1  1\n#> 2 2 NA NA\n#> 3 3  3  3\n#> 5 5  5  5\n```\n\n### Recode or cut dataframe\n\n``` r\nset.seed(123)\nx <- sample(1:10, size = 50, replace = TRUE)\n\ntable(x)\n#> x\n#>  1  2  3  4  5  6  7  8  9 10 \n#>  2  3  5  3  7  5  5  2 11  7\n\n# cut into 3 groups, based on distribution (quantiles)\ntable(categorize(x, split = \"quantile\", n_groups = 3))\n#> \n#>  1  2  3 \n#> 13 19 18\n```\n\n## Data Transformations\n\nThe packages also contains multiple functions to help transform data.\n\n### Standardize\n\nFor example, to standardize (*z*-score) data:\n\n``` r\n# before\nsummary(swiss)\n#>    Fertility      Agriculture     Examination      Education    \n#>  Min.   :35.00   Min.   : 1.20   Min.   : 3.00   Min.   : 1.00  \n#>  1st Qu.:64.70   1st Qu.:35.90   1st Qu.:12.00   1st Qu.: 6.00  \n#>  Median :70.40   Median :54.10   Median :16.00   Median : 8.00  \n#>  Mean   :70.14   Mean   :50.66   Mean   :16.49   Mean   :10.98  \n#>  3rd Qu.:78.45   3rd Qu.:67.65   3rd Qu.:22.00   3rd Qu.:12.00  \n#>  Max.   :92.50   Max.   :89.70   Max.   :37.00   Max.   :53.00  \n#>     Catholic       Infant.Mortality\n#>  Min.   :  2.150   Min.   :10.80   \n#>  1st Qu.:  5.195   1st Qu.:18.15   \n#>  Median : 15.140   Median :20.00   \n#>  Mean   : 41.144   Mean   :19.94   \n#>  3rd Qu.: 93.125   3rd Qu.:21.70   \n#>  Max.   :100.000   Max.   :26.60\n\n# after\nsummary(standardize(swiss))\n#>    Fertility         Agriculture       Examination         Education      \n#>  Min.   :-2.81327   Min.   :-2.1778   Min.   :-1.69084   Min.   :-1.0378  \n#>  1st Qu.:-0.43569   1st Qu.:-0.6499   1st Qu.:-0.56273   1st Qu.:-0.5178  \n#>  Median : 0.02061   Median : 0.1515   Median :-0.06134   Median :-0.3098  \n#>  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000  \n#>  3rd Qu.: 0.66504   3rd Qu.: 0.7481   3rd Qu.: 0.69074   3rd Qu.: 0.1062  \n#>  Max.   : 1.78978   Max.   : 1.7190   Max.   : 2.57094   Max.   : 4.3702  \n#>     Catholic       Infant.Mortality  \n#>  Min.   :-0.9350   Min.   :-3.13886  \n#>  1st Qu.:-0.8620   1st Qu.:-0.61543  \n#>  Median :-0.6235   Median : 0.01972  \n#>  Mean   : 0.0000   Mean   : 0.00000  \n#>  3rd Qu.: 1.2464   3rd Qu.: 0.60337  \n#>  Max.   : 1.4113   Max.   : 2.28566\n```\n\n### Winsorize\n\nTo winsorize data:\n\n``` r\n# before\nanscombe\n#>    x1 x2 x3 x4    y1   y2    y3    y4\n#> 1  10 10 10  8  8.04 9.14  7.46  6.58\n#> 2   8  8  8  8  6.95 8.14  6.77  5.76\n#> 3  13 13 13  8  7.58 8.74 12.74  7.71\n#> 4   9  9  9  8  8.81 8.77  7.11  8.84\n#> 5  11 11 11  8  8.33 9.26  7.81  8.47\n#> 6  14 14 14  8  9.96 8.10  8.84  7.04\n#> 7   6  6  6  8  7.24 6.13  6.08  5.25\n#> 8   4  4  4 19  4.26 3.10  5.39 12.50\n#> 9  12 12 12  8 10.84 9.13  8.15  5.56\n#> 10  7  7  7  8  4.82 7.26  6.42  7.91\n#> 11  5  5  5  8  5.68 4.74  5.73  6.89\n\n# after\nwinsorize(anscombe)\n#>    x1 x2 x3 x4   y1   y2   y3   y4\n#> 1  10 10 10  8 8.04 9.13 7.46 6.58\n#> 2   8  8  8  8 6.95 8.14 6.77 5.76\n#> 3  12 12 12  8 7.58 8.74 8.15 7.71\n#> 4   9  9  9  8 8.81 8.77 7.11 8.47\n#> 5  11 11 11  8 8.33 9.13 7.81 8.47\n#> 6  12 12 12  8 8.81 8.10 8.15 7.04\n#> 7   6  6  6  8 7.24 6.13 6.08 5.76\n#> 8   6  6  6  8 5.68 6.13 6.08 8.47\n#> 9  12 12 12  8 8.81 9.13 8.15 5.76\n#> 10  7  7  7  8 5.68 7.26 6.42 7.91\n#> 11  6  6  6  8 5.68 6.13 6.08 6.89\n```\n\n### Center\n\nTo grand-mean center data\n\n``` r\ncenter(anscombe)\n#>    x1 x2 x3 x4          y1         y2    y3         y4\n#> 1   1  1  1 -1  0.53909091  1.6390909 -0.04 -0.9209091\n#> 2  -1 -1 -1 -1 -0.55090909  0.6390909 -0.73 -1.7409091\n#> 3   4  4  4 -1  0.07909091  1.2390909  5.24  0.2090909\n#> 4   0  0  0 -1  1.30909091  1.2690909 -0.39  1.3390909\n#> 5   2  2  2 -1  0.82909091  1.7590909  0.31  0.9690909\n#> 6   5  5  5 -1  2.45909091  0.5990909  1.34 -0.4609091\n#> 7  -3 -3 -3 -1 -0.26090909 -1.3709091 -1.42 -2.2509091\n#> 8  -5 -5 -5 10 -3.24090909 -4.4009091 -2.11  4.9990909\n#> 9   3  3  3 -1  3.33909091  1.6290909  0.65 -1.9409091\n#> 10 -2 -2 -2 -1 -2.68090909 -0.2409091 -1.08  0.4090909\n#> 11 -4 -4 -4 -1 -1.82090909 -2.7609091 -1.77 -0.6109091\n```\n\n### Ranktransform\n\nTo rank-transform data:\n\n``` r\n# before\nhead(trees)\n#>   Girth Height Volume\n#> 1   8.3     70   10.3\n#> 2   8.6     65   10.3\n#> 3   8.8     63   10.2\n#> 4  10.5     72   16.4\n#> 5  10.7     81   18.8\n#> 6  10.8     83   19.7\n\n# after\nhead(ranktransform(trees))\n#>   Girth Height Volume\n#> 1     1    6.0    2.5\n#> 2     2    3.0    2.5\n#> 3     3    1.0    1.0\n#> 4     4    8.5    5.0\n#> 5     5   25.5    7.0\n#> 6     6   28.0    9.0\n```\n\n### Rescale\n\nTo rescale a numeric variable to a new range:\n\n``` r\nchange_scale(c(0, 1, 5, -5, -2))\n#> [1]  50  60 100   0  30\n#> (original range = -5 to 5)\n```\n\n### Rotate or transpose\n\n``` r\nx <- mtcars[1:3, 1:4]\n\nx\n#>                mpg cyl disp  hp\n#> Mazda RX4     21.0   6  160 110\n#> Mazda RX4 Wag 21.0   6  160 110\n#> Datsun 710    22.8   4  108  93\n\ndata_rotate(x)\n#>      Mazda RX4 Mazda RX4 Wag Datsun 710\n#> mpg         21            21       22.8\n#> cyl          6             6        4.0\n#> disp       160           160      108.0\n#> hp         110           110       93.0\n```\n\n## Data properties\n\n`datawizard` provides a way to provide comprehensive descriptive summary\nfor all variables in a dataframe:\n\n``` r\ndata(iris)\ndescribe_distribution(iris)\n#> Variable     | Mean |   SD |  IQR |        Range | Skewness | Kurtosis |   n | n_Missing\n#> ----------------------------------------------------------------------------------------\n#> Sepal.Length | 5.84 | 0.83 | 1.30 | [4.30, 7.90] |     0.31 |    -0.55 | 150 |         0\n#> Sepal.Width  | 3.06 | 0.44 | 0.52 | [2.00, 4.40] |     0.32 |     0.23 | 150 |         0\n#> Petal.Length | 3.76 | 1.77 | 3.52 | [1.00, 6.90] |    -0.27 |    -1.40 | 150 |         0\n#> Petal.Width  | 1.20 | 0.76 | 1.50 | [0.10, 2.50] |    -0.10 |    -1.34 | 150 |         0\n```\n\nOr even just a variable\n\n``` r\ndescribe_distribution(mtcars$wt)\n#> Mean |   SD |  IQR |        Range | Skewness | Kurtosis |  n | n_Missing\n#> ------------------------------------------------------------------------\n#> 3.22 | 0.98 | 1.19 | [1.51, 5.42] |     0.47 |     0.42 | 32 |         0\n```\n\nThere are also some additional data properties that can be computed\nusing this package.\n\n``` r\nx <- (-10:10)^3 + rnorm(21, 0, 100)\nsmoothness(x, method = \"diff\")\n#> [1] 1.791243\n#> attr(,\"class\")\n#> [1] \"parameters_smoothness\" \"numeric\"\n```\n\n## Function design and pipe-workflow\n\nThe design of the `{datawizard}` functions follows a design principle\nthat makes it easy for user to understand and remember how functions\nwork:\n\n1.  the first argument is the data\n2.  for methods that work on data frames, two arguments are following to\n    `select` and `exclude` variables\n3.  the following arguments are arguments related to the specific tasks\n    of the functions\n\nMost important, functions that accept data frames usually have this as\ntheir first argument, and also return a (modified) data frame again.\nThus, `{datawizard}` integrates smoothly into a “pipe-workflow”.\n\n``` r\niris |>\n  # all rows where Species is \"versicolor\" or \"virginica\"\n  data_filter(Species %in% c(\"versicolor\", \"virginica\")) |>\n  # select only columns with \".\" in names (i.e. drop Species)\n  data_select(contains(\"\\\\.\")) |>\n  # move columns that ends with \"Length\" to start of data frame\n  data_relocate(ends_with(\"Length\")) |>\n  # remove fourth column\n  data_remove(4) |>\n  head()\n#>    Sepal.Length Petal.Length Sepal.Width\n#> 51          7.0          4.7         3.2\n#> 52          6.4          4.5         3.2\n#> 53          6.9          4.9         3.1\n#> 54          5.5          4.0         2.3\n#> 55          6.5          4.6         2.8\n#> 56          5.7          4.5         2.8\n```\n\n# Contributing and Support\n\nIn case you want to file an issue or contribute in another way to the\npackage, please follow [this\nguide](https://easystats.github.io/datawizard/CONTRIBUTING.html). For\nquestions about the functionality, you may either contact us via email\nor also file an issue.\n\n# Code of Conduct\n\nPlease note that this project is released with a [Contributor Code of\nConduct](https://easystats.github.io/datawizard/CODE_OF_CONDUCT.html).\nBy participating in this project you agree to abide by its terms.\n"
  },
  {
    "path": "air.toml",
    "content": "[format]\nline-width = 80\nindent-width = 2\nindent-style = \"space\"\nline-ending = \"lf\"\npersistent-line-breaks = true\nskip = [\"tribble\"]\n"
  },
  {
    "path": "cran-comments.md",
    "content": "This fixes R-devel errors reported on 2026-04-23.\n"
  },
  {
    "path": "datawizard.Rproj",
    "content": "Version: 1.0\n\nRestoreWorkspace: No\nSaveWorkspace: No\nAlwaysSaveHistory: No\n\nEnableCodeIndexing: Yes\nUseSpacesForTab: Yes\nNumSpacesForTab: 2\nEncoding: UTF-8\n\nRnwWeave: knitr\nLaTeX: XeLaTeX\n\nAutoAppendNewline: Yes\nStripTrailingWhitespace: Yes\n\nBuildType: Package\nPackageUseDevtools: Yes\nPackageInstallArgs: --no-multiarch --with-keep.source\nPackageRoxygenize: rd,collate,namespace\n\nQuitChildProcessesOnExit: Yes\nDisableExecuteRprofile: Yes\n"
  },
  {
    "path": "datawizard.code-workspace",
    "content": "{\n\t\"folders\": [\n\t\t{\n\t\t\t\"path\": \".\"\n\t\t}\n  ],\n  \"launch\": {\n    \"version\": \"0.2.0\",\n    \"configurations\": [\n      {\n        \"type\": \"R-Debugger\",\n        \"name\": \"Launch R-Workspace\",\n        \"request\": \"launch\",\n        \"debugMode\": \"workspace\",\n        \"workingDirectory\": \"\"\n      }\n    ]\n  }\n}\n"
  },
  {
    "path": "inst/CITATION",
    "content": "bibentry(\n  bibtype=\"Article\",\n  title=\"{datawizard}: An {R} Package for Easy Data Preparation and Statistical Transformations\",\n  author=c(person(\"Indrajeet\", \"Patil\"), person(\"Dominique\", \"Makowski\"), person(\"Mattan S.\", \"Ben-Shachar\"), person(\"Brenton M.\", \"Wiernik\"), person(\"Etienne\", \"Bacher\"), person(\"Daniel\", \"Lüdecke\")),\n  journal=\"Journal of Open Source Software\",\n  year = 2022,\n  volume = 7,\n  number = 78,\n  pages = 4684,\n  doi = \"10.21105/joss.04684\",\n  textVersion = \"Patil et al., (2022). datawizard: An R Package for Easy Data Preparation and Statistical Transformations. Journal of Open Source Software, 7(78), 4684, https://doi.org/10.21105/joss.04684\"\n)\n"
  },
  {
    "path": "inst/WORDLIST",
    "content": "AES\nAnalysing\nAsparouhov\nBMC\nBafumi\nBrincks\nBulotsky\nCMD\nCarle\nCatran\nCrosstables\nDEPRECATIONS\nDe\nDhaliwal\nDisaggregating\nEFC\nEUROFAMCARE\nEnders\nFairbrother\nGCM\nGLMM\nGelman\nGiesecke\nGiesselmann\nGuo\nHeisig\nHerrington\nHoffmann\nJoanes\nKish\nLlabre\nLumley\nMADs\nMattan\nMinitab\nORCID\nOpenBLAS\nPSU\nRanktransform\nRoutledge\nSDs\nSchaeffer\nShachar\nStata\nTidyverse\nTitleCase\nVerkuilen\nVerkuilen's\nWinsorize\nWinsorized\nWinsorizing\nal\nbehaviour\nbehaviours\nbrms\ncodebook\ncodebooks\ncodecov\ncrosstable\ncrosstables\ncsv\nde\ndecrypt\ndecrypted\ndoi\neasystats\neffectsize\nendogeneity\net\ngeoms\nggplot's\nhttps\ning\ninterpretability\ninversed\njoss\nlabelled\nlabelling\nleptokurtic\nlm\nlme\nmeaned\nmesokurtic\nmidhinge\nmodelbased\nnanoparquet\nnd\nnoLD\nopenssl\npanelr\npartialization\nplatykurtic\npoorman\npre\npx\nreadr\nreadxl\nrelevel\nrio\nrowid\nrstanarm\nsd\nstackexchange\ntailedness\nth\ntibble\ntibbles\ntidyverse\nunitless\nunstored\nvisualisation\nwikipedia\nwinsorization\nwinsorize\nwinsorized\nwinsorizes\nwinsorizing\nzscore\n"
  },
  {
    "path": "man/adjust.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/adjust.R\n\\name{adjust}\n\\alias{adjust}\n\\alias{data_adjust}\n\\title{Adjust data for the effect of other variable(s)}\n\\usage{\nadjust(\n  data,\n  effect = NULL,\n  select = is.numeric,\n  exclude = NULL,\n  multilevel = FALSE,\n  additive = FALSE,\n  bayesian = FALSE,\n  keep_intercept = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE\n)\n\ndata_adjust(\n  data,\n  effect = NULL,\n  select = is.numeric,\n  exclude = NULL,\n  multilevel = FALSE,\n  additive = FALSE,\n  bayesian = FALSE,\n  keep_intercept = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{effect}{Character vector of column names to be adjusted for (regressed\nout). If \\code{NULL} (the default), all variables will be selected.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{multilevel}{If \\code{TRUE}, the factors are included as random factors.\nElse, if \\code{FALSE} (default), they are included as fixed effects in the\nsimple regression model.}\n\n\\item{additive}{If \\code{TRUE}, continuous variables as included as smooth terms\nin additive models. The goal is to regress-out potential non-linear\neffects.}\n\n\\item{bayesian}{If \\code{TRUE}, the models are fitted under the Bayesian framework\nusing \\code{rstanarm}.}\n\n\\item{keep_intercept}{If \\code{FALSE} (default), the intercept of the model is\nre-added. This avoids the centering around 0 that happens by default\nwhen regressing out another variable (see the examples below for a\nvisual representation of this).}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA data frame comparable to \\code{data}, with adjusted variables.\n}\n\\description{\nThis function can be used to adjust the data for the effect of other\nvariables present in the dataset. It is based on an underlying fitting of\nregressions models, allowing for quite some flexibility, such as including\nfactors as random effects in mixed models (multilevel partialization),\ncontinuous variables as smooth terms in general additive models (non-linear\npartialization) and/or fitting these models under a Bayesian framework. The\nvalues returned by this function are the residuals of the regression models.\nNote that a regular correlation between two \"adjusted\" variables is\nequivalent to the partial correlation between them.\n}\n\\examples{\n\\dontshow{if (all(insight::check_if_installed(c(\"bayestestR\", \"rstanarm\", \"gamm4\"), quietly = TRUE))) withAutoprint(\\{ # examplesIf}\nadjusted_all <- adjust(attitude)\nhead(adjusted_all)\nadjusted_one <- adjust(attitude, effect = \"complaints\", select = \"rating\")\nhead(adjusted_one)\n\\donttest{\nadjust(attitude, effect = \"complaints\", select = \"rating\", bayesian = TRUE)\nadjust(attitude, effect = \"complaints\", select = \"rating\", additive = TRUE)\nattitude$complaints_LMH <- cut(attitude$complaints, 3)\nadjust(attitude, effect = \"complaints_LMH\", select = \"rating\", multilevel = TRUE)\n}\n\n# Generate data\ndata <- bayestestR::simulate_correlation(n = 100, r = 0.7)\ndata$V2 <- (5 * data$V2) + 20 # Add intercept\n\n# Adjust\nadjusted <- adjust(data, effect = \"V1\", select = \"V2\")\nadjusted_icpt <- adjust(data, effect = \"V1\", select = \"V2\", keep_intercept = TRUE)\n\n# Visualize\nplot(\n  data$V1, data$V2,\n  pch = 19, col = \"blue\",\n  ylim = c(min(adjusted$V2), max(data$V2)),\n  main = \"Original (blue), adjusted (green), and adjusted - intercept kept (red) data\"\n)\nabline(lm(V2 ~ V1, data = data), col = \"blue\")\npoints(adjusted$V1, adjusted$V2, pch = 19, col = \"green\")\nabline(lm(V2 ~ V1, data = adjusted), col = \"green\")\npoints(adjusted_icpt$V1, adjusted_icpt$V2, pch = 19, col = \"red\")\nabline(lm(V2 ~ V1, data = adjusted_icpt), col = \"red\")\n\\dontshow{\\}) # examplesIf}\n}\n"
  },
  {
    "path": "man/as.prop.table.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_tabulate.R\n\\name{as.prop.table}\n\\alias{as.prop.table}\n\\alias{as.prop.table.datawizard_crosstab}\n\\alias{as.data.frame.datawizard_tables}\n\\alias{as.table.datawizard_table}\n\\title{Convert a crosstable to a frequency or a propensity table}\n\\usage{\nas.prop.table(x, ...)\n\n\\method{as.prop.table}{datawizard_crosstab}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...)\n\n\\method{as.data.frame}{datawizard_tables}(\n  x,\n  row.names = NULL,\n  optional = FALSE,\n  ...,\n  stringsAsFactors = FALSE,\n  add_total = FALSE\n)\n\n\\method{as.table}{datawizard_table}(x, remove_na = TRUE, simplify = FALSE, verbose = TRUE, ...)\n}\n\\arguments{\n\\item{x}{An object created by \\code{data_tabulate()}. It must be of class\n\\code{datawizard_crosstab} for \\code{as.prop.table()}.}\n\n\\item{...}{not used.}\n\n\\item{remove_na}{Logical, if \\code{FALSE}, missing values are included in the\nfrequency or crosstable, else missing values are omitted. Note that the\ndefault for the \\code{as.table()} method is \\code{remove_na = TRUE}, so that missing\nvalues are not included in the returned table, which makes more sense for\npost-processing of the table, e.g. using \\code{chisq.test()}.}\n\n\\item{simplify}{Logical, if \\code{TRUE}, the returned table is simplified to a\nsingle table object if there is only one frequency or contingency table\ninput. Else, always for multiple table inputs or when \\code{simplify = FALSE}, a\nlist of tables is returned. This is only relevant for the \\code{as.table()}\nmethods. To ensure consistent output, the default is \\code{FALSE}.}\n\n\\item{verbose}{Toggle warnings and messages.}\n\n\\item{row.names}{\\code{NULL} or a character vector giving the row\n    names for the data frame.  Missing values are not allowed.}\n\n\\item{optional}{logical. If \\code{TRUE}, setting row names and\n    converting column names (to syntactic names: see\n    \\code{\\link[base]{make.names}}) is optional.  Note that all of \\R's\n    \\pkg{base} package \\code{as.data.frame()} methods use\n    \\code{optional} only for column names treatment, basically with the\n    meaning of \\code{\\link[base]{data.frame}(*, check.names = !optional)}.\n    See also the \\code{make.names} argument of the \\code{matrix} method.}\n\n\\item{stringsAsFactors}{logical: should the character vector be converted\n    to a factor?}\n\n\\item{add_total}{For crosstables (i.e. when \\code{by} is not \\code{NULL}), a row and\ncolumn with the total N values are added to the data frame. \\code{add_total} has\nno effect in \\code{as.data.frame()} for simple frequency tables.}\n}\n\\description{\n\\code{as.prop.table()} is an S3 generic. It can be used on objects of class\n\\code{datawizard_crosstab} created by \\code{data_tabulate()} when it was run with the\narguments \\code{by} and \\code{proportions}.\n}\n\\examples{\ndata(efc)\n\n# Some cross tabulation\ncross <- data_tabulate(efc, select = \"e42dep\", by = \"c172code\", proportions = \"row\")\ncross\n\n# Convert to a propensity table\nas.prop.table(cross)\n\n# Convert to data.frame\nresult <- data_tabulate(efc, \"c172code\", by = \"e16sex\")\nas.data.frame(result)\nas.data.frame(result)$table\nas.data.frame(result, add_total = TRUE)$table\n\n# Convert to a table that can be passed to chisq.test()\n\nout <- data_tabulate(efc, \"c172code\", by = \"e16sex\")\n# we need to simplify the output, else we get a list of tables\ntbl <- as.table(out, simplify = TRUE)\ntbl\nsuppressWarnings(chisq.test(tbl))\n\n# apply chisq.test to each table\nout <- data_tabulate(efc, c(\"c172code\", \"e16sex\"))\nsuppressWarnings(lapply(as.table(out), chisq.test))\n\n# can also handle grouped data frames\nd <- data_group(mtcars, \"am\")\nx <- data_tabulate(d, \"cyl\", by = \"gear\")\nas.table(x)\n}\n\\seealso{\n\\link{data_tabulate}\n}\n"
  },
  {
    "path": "man/assign_labels.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/assign_labels.R\n\\name{assign_labels}\n\\alias{assign_labels}\n\\alias{assign_labels.numeric}\n\\alias{assign_labels.data.frame}\n\\title{Assign variable and value labels}\n\\usage{\nassign_labels(x, ...)\n\n\\method{assign_labels}{numeric}(x, variable = NULL, values = NULL, ...)\n\n\\method{assign_labels}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  values = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A data frame, factor or vector.}\n\n\\item{...}{Currently not used.}\n\n\\item{variable}{The variable label as string.}\n\n\\item{values}{The value labels as (named) character vector. If \\code{values} is\n\\emph{not} a named vector, the length of labels must be equal to the length of\nunique values. For a named vector, the left-hand side (LHS) is the value in\n\\code{x}, the right-hand side (RHS) the associated value label. Non-matching\nlabels are omitted.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA labelled variable, or a data frame of labelled variables.\n}\n\\description{\nAssign variable and values labels to a variable or variables in a data frame.\nLabels are stored as attributes (\\code{\"label\"} for variable labels and \\code{\"labels\"})\nfor value labels.\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\nx <- 1:3\n# labelling by providing required number of labels\nassign_labels(\n  x,\n  variable = \"My x\",\n  values = c(\"one\", \"two\", \"three\")\n)\n\n# labelling using named vectors\ndata(iris)\nout <- assign_labels(\n  iris$Species,\n  variable = \"Labelled Species\",\n  values = c(`setosa` = \"Spec1\", `versicolor` = \"Spec2\", `virginica` = \"Spec3\")\n)\nstr(out)\n\n# data frame example\nout <- assign_labels(\n  iris,\n  select = \"Species\",\n  variable = \"Labelled Species\",\n  values = c(`setosa` = \"Spec1\", `versicolor` = \"Spec2\", `virginica` = \"Spec3\")\n)\nstr(out$Species)\n\n# Partial labelling\nx <- 1:5\nassign_labels(\n  x,\n  variable = \"My x\",\n  values = c(`1` = \"lowest\", `5` = \"highest\")\n)\n}\n"
  },
  {
    "path": "man/categorize.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/categorize.R\n\\name{categorize}\n\\alias{categorize}\n\\alias{categorize.numeric}\n\\alias{categorize.data.frame}\n\\title{Recode (or \"cut\" / \"bin\") data into groups of values.}\n\\usage{\ncategorize(x, ...)\n\n\\method{categorize}{numeric}(\n  x,\n  split = \"median\",\n  n_groups = NULL,\n  range = NULL,\n  lowest = 1,\n  breaks = \"exclusive\",\n  labels = NULL,\n  verbose = TRUE,\n  ...\n)\n\n\\method{categorize}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  split = \"median\",\n  n_groups = NULL,\n  range = NULL,\n  lowest = 1,\n  breaks = \"exclusive\",\n  labels = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A (grouped) data frame, numeric vector or factor.}\n\n\\item{...}{not used.}\n\n\\item{split}{Character vector, indicating at which breaks to split variables,\nor numeric values with values indicating breaks. If character, may be one\nof \\code{\"median\"}, \\code{\"mean\"}, \\code{\"quantile\"}, \\code{\"equal_length\"}, or \\code{\"equal_range\"}.\n\\code{\"median\"} or \\code{\"mean\"} will return dichotomous variables, split at their\nmean or median, respectively. \\code{\"quantile\"} and \\code{\"equal_length\"} will split\nthe variable into \\code{n_groups} groups, where each group refers to an interval\nof a specific range of values. Thus, the length of each interval will be\nbased on the number of groups. \\code{\"equal_range\"} also splits the variable\ninto multiple groups, however, the length of the interval is given, and\nthe number of resulting groups (and hence, the number of breaks) will be\ndetermined by how many intervals can be generated, based on the full range\nof the variable.}\n\n\\item{n_groups}{If \\code{split} is \\code{\"quantile\"} or \\code{\"equal_length\"}, this defines\nthe number of requested groups (i.e. resulting number of levels or values)\nfor the recoded variable(s). \\code{\"quantile\"} will define intervals based\non the distribution of the variable, while \\code{\"equal_length\"} tries to\ndivide the range of the variable into pieces of equal length.}\n\n\\item{range}{If \\code{split = \"equal_range\"}, this defines the range of values\nthat are recoded into a new value.}\n\n\\item{lowest}{Minimum value of the recoded variable(s). If \\code{NULL} (the default),\nfor numeric variables, the minimum of the original input is preserved. For\nfactors, the default minimum is \\code{1}. For \\code{split = \"equal_range\"}, the\ndefault minimum is always \\code{1}, unless specified otherwise in \\code{lowest}.}\n\n\\item{breaks}{Character, indicating whether breaks for categorizing data are\n\\code{\"inclusive\"} (values indicate the \\emph{upper} bound of the \\emph{previous} group or\ninterval) or \\code{\"exclusive\"} (values indicate the \\emph{lower} bound of the \\emph{next}\ngroup or interval to begin). Use \\code{labels = \"range\"} to make this behaviour\neasier to see.}\n\n\\item{labels}{Character vector of value labels. If not \\code{NULL}, \\code{categorize()}\nwill returns factors instead of numeric variables, with \\code{labels} used\nfor labelling the factor levels. Can also be \\code{\"mean\"}, \\code{\"median\"},\n\\code{\"range\"} or \\code{\"observed\"} for a factor with labels as the mean/median,\nthe requested range (even if not all values of that range are present in\nthe data) or observed range (range of the actual recoded values) of each\ngroup. See 'Examples'.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, recoded or converted variables\nget new column names and are appended (column bind) to \\code{x}, thus returning\nboth the original and the recoded variables. The new columns get a suffix,\nbased on the calling function: \\code{\"_r\"} for recode functions, \\code{\"_n\"} for\n\\code{to_numeric()}, \\code{\"_f\"} for \\code{to_factor()}, or \\code{\"_s\"} for\n\\code{slide()}. If \\code{append=FALSE}, original variables in \\code{x} will be\noverwritten by their recoded versions. If a character value, recoded\nvariables are appended with new column names (using the defined suffix) to\nthe original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\n\\code{x}, recoded into groups. By default \\code{x} is numeric, unless \\code{labels}\nis specified. In this case, a factor is returned, where the factor levels\n(i.e. recoded groups are labelled accordingly.\n}\n\\description{\nThis functions divides the range of variables into intervals and recodes\nthe values inside these intervals according to their related interval.\nIt is basically a wrapper around base R's \\code{cut()}, providing a simplified\nand more accessible way to define the interval breaks (cut-off values).\n}\n\\section{Splits and breaks (cut-off values)}{\nBreaks are by default \\emph{exclusive}, this means that these values indicate\nthe lower bound of the next group or interval to begin. Take a simple\nexample, a numeric variable with values from 1 to 9. The median would be 5,\nthus the first interval ranges from 1-4 and is recoded into 1, while 5-9\nwould turn into 2 (compare \\code{cbind(1:9, categorize(1:9))}). The same variable,\nusing \\code{split = \"quantile\"} and \\code{n_groups = 3} would define breaks at 3.67\nand 6.33 (see \\code{quantile(1:9, probs = c(1/3, 2/3))}), which means that values\nfrom 1 to 3 belong to the first interval and are recoded into 1 (because\nthe next interval starts at 3.67), 4 to 6 into 2 and 7 to 9 into 3.\n\nThe opposite behaviour can be achieved using \\code{breaks = \"inclusive\"}, in which\ncase\n}\n\n\\section{Recoding into groups with equal size or range}{\n\\code{split = \"equal_length\"} and \\code{split = \"equal_range\"} try to divide the\nrange of \\code{x} into intervals of similar (or same) length. The difference is\nthat \\code{split = \"equal_length\"} will divide the range of \\code{x} into \\code{n_groups}\npieces and thereby defining the intervals used as breaks (hence, it is\nequivalent to \\code{cut(x, breaks = n_groups)}), while  \\code{split = \"equal_range\"}\nwill cut \\code{x} into intervals that all have the length of \\code{range}, where the\nfirst interval by defaults starts at \\code{1}. The lowest (or starting) value\nof that interval can be defined using the \\code{lowest} argument.\n}\n\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\nset.seed(123)\nx <- sample(1:10, size = 50, replace = TRUE)\n\ntable(x)\n\n# by default, at median\ntable(categorize(x))\n\n# into 3 groups, based on distribution (quantiles)\ntable(categorize(x, split = \"quantile\", n_groups = 3))\n\n# into 3 groups, user-defined break\ntable(categorize(x, split = c(3, 5)))\n\nset.seed(123)\nx <- sample(1:100, size = 500, replace = TRUE)\n\n# into 5 groups, try to recode into intervals of similar length,\n# i.e. the range within groups is the same for all groups\ntable(categorize(x, split = \"equal_length\", n_groups = 5))\n\n# into 5 groups, try to return same range within groups\n# i.e. 1-20, 21-40, 41-60, etc. Since the range of \"x\" is\n# 1-100, and we have a range of 20, this results into 5\n# groups, and thus is for this particular case identical\n# to the previous result.\ntable(categorize(x, split = \"equal_range\", range = 20))\n\n# return factor with value labels instead of numeric value\nset.seed(123)\nx <- sample(1:10, size = 30, replace = TRUE)\ncategorize(x, \"equal_length\", n_groups = 3)\ncategorize(x, \"equal_length\", n_groups = 3, labels = c(\"low\", \"mid\", \"high\"))\n\n# cut numeric into groups with the mean or median as a label name\nx <- sample(1:10, size = 30, replace = TRUE)\ncategorize(x, \"equal_length\", n_groups = 3, labels = \"mean\")\ncategorize(x, \"equal_length\", n_groups = 3, labels = \"median\")\n\n# cut numeric into groups with the requested range as a label name\n# each category has the same range, and labels indicate this range\ncategorize(mtcars$mpg, \"equal_length\", n_groups = 5, labels = \"range\")\n# in this example, each category has the same range, but labels only refer\n# to the ranges of the actual values (present in the data) inside each group\ncategorize(mtcars$mpg, \"equal_length\", n_groups = 5, labels = \"observed\")\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/center.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/center.R\n\\name{center}\n\\alias{center}\n\\alias{centre}\n\\alias{center.numeric}\n\\alias{center.data.frame}\n\\title{Centering (Grand-Mean Centering)}\n\\usage{\ncenter(x, ...)\n\ncentre(x, ...)\n\n\\method{center}{numeric}(\n  x,\n  robust = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  verbose = TRUE,\n  ...\n)\n\n\\method{center}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  robust = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  force = FALSE,\n  remove_na = c(\"none\", \"selected\", \"all\"),\n  append = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A (grouped) data frame, a (numeric or character) vector or a factor.}\n\n\\item{...}{Currently not used.}\n\n\\item{robust}{Logical, if \\code{TRUE}, centering is done by subtracting the\nmedian from the variables. If \\code{FALSE}, variables are centered by\nsubtracting the mean.}\n\n\\item{weights}{Can be \\code{NULL} (for no weighting), or:\n\\itemize{\n\\item For data frames: a numeric vector of weights, or a character of the\nname of a column in the \\code{data.frame} that contains the weights.\n\\item For numeric vectors: a numeric vector of weights.\n}}\n\n\\item{reference}{A data frame or variable from which the centrality and\ndeviation will be computed instead of from the input variable. Useful for\nstandardizing a subset or new data according to another data frame.}\n\n\\item{center}{Numeric value, which can be used as alternative to\n\\code{reference} to define a reference centrality. If \\code{center} is of length 1,\nit will be recycled to match the length of selected variables for centering.\nElse, \\code{center} must be of same length as the number of selected variables.\nValues in \\code{center} will be matched to selected variables in the provided\norder, unless a named vector is given. In this case, names are matched\nagainst the names of the selected variables.}\n\n\\item{verbose}{Toggle warnings and messages.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{force}{Logical, if \\code{TRUE}, forces centering of factors as\nwell. Factors are converted to numerical values, with the lowest level\nbeing the value \\code{1} (unless the factor has numeric levels, which are\nconverted to the corresponding numeric value).}\n\n\\item{remove_na}{How should missing values (\\code{NA}) be treated: if \\code{\"none\"}\n(default): each column's standardization is done separately, ignoring\n\\code{NA}s. Else, rows with \\code{NA} in the columns selected with \\code{select} /\n\\code{exclude} (\\code{\"selected\"}) or in all columns (\\code{\"all\"}) are dropped before\nstandardization, and the resulting data frame does not include these cases.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, centered variables get new\ncolumn names (with the suffix \\code{\"_c\"}) and are appended (column bind) to \\code{x},\nthus returning both the original and the centered variables. If \\code{FALSE},\noriginal variables in \\code{x} will be overwritten by their centered versions.\nIf a character value, centered variables are appended with new column\nnames (using the defined suffix) to the original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\nThe centered variables.\n}\n\\description{\nPerforms a grand-mean centering of data.\n}\n\\note{\n\\strong{Difference between centering and standardizing}: Standardized variables\nare computed by subtracting the mean of the variable and then dividing it by\nthe standard deviation, while centering variables involves only the\nsubtraction.\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\ndata(iris)\n\n# entire data frame or a vector\nhead(iris$Sepal.Width)\nhead(center(iris$Sepal.Width))\nhead(center(iris))\nhead(center(iris, force = TRUE))\n\n# only the selected columns from a data frame\ncenter(anscombe, select = c(\"x1\", \"x3\"))\ncenter(anscombe, exclude = c(\"x1\", \"x3\"))\n\n# centering with reference center and scale\nd <- data.frame(\n  a = c(-2, -1, 0, 1, 2),\n  b = c(3, 4, 5, 6, 7)\n)\n\n# default centering at mean\ncenter(d)\n\n# centering, using 0 as mean\ncenter(d, center = 0)\n\n# centering, using -5 as mean\ncenter(d, center = -5)\n}\n\\seealso{\nIf centering within-clusters (instead of grand-mean centering)\nis required, see \\code{\\link[=demean]{demean()}}. For standardizing, see \\code{\\link[=standardize]{standardize()}}, and\n\\code{\\link[=makepredictcall.dw_transformer]{makepredictcall.dw_transformer()}} for use in model formulas.\n}\n"
  },
  {
    "path": "man/coef_var.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/descriptives.R\n\\name{coef_var}\n\\alias{coef_var}\n\\alias{distribution_cv}\n\\alias{distribution_coef_var}\n\\alias{coef_var.numeric}\n\\title{Compute the coefficient of variation}\n\\usage{\ncoef_var(x, ...)\n\ndistribution_coef_var(x, ...)\n\n\\method{coef_var}{numeric}(\n  x,\n  mu = NULL,\n  sigma = NULL,\n  method = c(\"standard\", \"unbiased\", \"median_mad\", \"qcd\"),\n  trim = 0,\n  remove_na = FALSE,\n  n = NULL,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A numeric vector of ratio scale (see details), or vector of values than can be coerced to one.}\n\n\\item{...}{Further arguments passed to computation functions.}\n\n\\item{mu}{A numeric vector of mean values to use to compute the coefficient\nof variation. If supplied, \\code{x} is not used to compute the mean.}\n\n\\item{sigma}{A numeric vector of standard deviation values to use to compute the coefficient\nof variation. If supplied, \\code{x} is not used to compute the SD.}\n\n\\item{method}{Method to use to compute the CV. Can be \\code{\"standard\"} to compute\nby dividing the standard deviation by the mean, \\code{\"unbiased\"} for the\nunbiased estimator for normally distributed data, or one of two robust\nalternatives: \\code{\"median_mad\"} to divide the median by the \\code{\\link[stats:mad]{stats::mad()}},\nor \\code{\"qcd\"} (quartile coefficient of dispersion, interquartile range divided\nby the sum of the quartiles [twice the midhinge]: \\eqn{(Q_3 - Q_1)/(Q_3 + Q_1)}.}\n\n\\item{trim}{the fraction (0 to 0.5) of values to be trimmed from\neach end of \\code{x} before the mean and standard deviation (or other measures)\nare computed. Values of \\code{trim} outside the range of (0 to 0.5) are taken\nas the nearest endpoint.}\n\n\\item{remove_na}{Logical. Should \\code{NA} values be removed before computing (\\code{TRUE})\nor not (\\code{FALSE}, default)?}\n\n\\item{n}{If \\code{method = \"unbiased\"} and both \\code{mu} and \\code{sigma} are provided (not\ncomputed from \\code{x}), what sample size to use to adjust the computed CV\nfor small-sample bias?}\n}\n\\value{\nThe computed coefficient of variation for \\code{x}.\n}\n\\description{\nCompute the coefficient of variation (CV, ratio of the standard deviation to\nthe mean, \\eqn{\\sigma/\\mu}) for a set of numeric values.\n}\n\\details{\nCV is only applicable of values taken on a ratio scale: values that have a\n\\emph{fixed} meaningfully defined 0 (which is either the lowest or highest\npossible value), and that ratios between them are interpretable For example,\nhow many sandwiches have I eaten this week? 0 means \"none\" and 20 sandwiches\nis 4 times more than 5 sandwiches. If I were to center the number of\nsandwiches, it will no longer be on a ratio scale (0 is no \"none\" it is the\nmean, and the ratio between 4 and -2 is not meaningful). Scaling a ratio\nscale still results in a ratio scale. So I can re define \"how many half\nsandwiches did I eat this week ( = sandwiches * 0.5) and 0 would still mean\n\"none\", and 20 half-sandwiches is still 4 times more than 5 half-sandwiches.\n\nThis means that CV is \\strong{NOT} invariant to shifting, but it is to scaling:\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{sandwiches <- c(0, 4, 15, 0, 0, 5, 2, 7)\ncoef_var(sandwiches)\n#> [1] 1.239094\n\ncoef_var(sandwiches / 2) # same\n#> [1] 1.239094\n\ncoef_var(sandwiches + 4) # different! 0 is no longer meaningful!\n#> [1] 0.6290784\n}\\if{html}{\\out{</div>}}\n}\n\\examples{\ncoef_var(1:10)\ncoef_var(c(1:10, 100), method = \"median_mad\")\ncoef_var(c(1:10, 100), method = \"qcd\")\ncoef_var(mu = 10, sigma = 20)\ncoef_var(mu = 10, sigma = 20, method = \"unbiased\", n = 30)\n}\n"
  },
  {
    "path": "man/coerce_to_numeric.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/to_numeric.R\n\\name{coerce_to_numeric}\n\\alias{coerce_to_numeric}\n\\title{Convert to Numeric (if possible)}\n\\usage{\ncoerce_to_numeric(x)\n}\n\\arguments{\n\\item{x}{A vector to be converted.}\n}\n\\value{\nNumeric vector (if possible)\n}\n\\description{\nTries to convert vector to numeric if possible (if no warnings or errors).\nOtherwise, leaves it as is.\n}\n\\examples{\ncoerce_to_numeric(c(\"1\", \"2\"))\ncoerce_to_numeric(c(\"1\", \"2\", \"A\"))\n}\n"
  },
  {
    "path": "man/colnames.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils-cols.R\n\\name{row_to_colnames}\n\\alias{row_to_colnames}\n\\alias{colnames_to_row}\n\\title{Tools for working with column names}\n\\usage{\nrow_to_colnames(x, row = 1, na_prefix = \"x\", verbose = TRUE)\n\ncolnames_to_row(x, prefix = \"x\")\n}\n\\arguments{\n\\item{x}{A data frame.}\n\n\\item{row}{Row to use as column names.}\n\n\\item{na_prefix}{Prefix to give to the column name if the row has an \\code{NA}.\nDefault is 'x', and it will be incremented at each \\code{NA} (\\code{x1}, \\code{x2}, etc.).}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{prefix}{Prefix to give to the column name. Default is 'x', and it will\nbe incremented at each column (\\code{x1}, \\code{x2}, etc.).}\n}\n\\value{\n\\code{row_to_colnames()} and \\code{colnames_to_row()} both return a data frame.\n}\n\\description{\nTools for working with column names\n}\n\\examples{\n# Convert a row to column names --------------------------------\ntest <- data.frame(\n  a = c(\"iso\", 2, 5),\n  b = c(\"year\", 3, 6),\n  c = c(\"value\", 5, 7)\n)\ntest\nrow_to_colnames(test)\n\n# Convert column names to row --------------------------------\ntest <- data.frame(\n  ARG = c(\"BRA\", \"FRA\"),\n  `1960` = c(1960, 1960),\n  `2000` = c(2000, 2000)\n)\ntest\ncolnames_to_row(test)\n\n}\n"
  },
  {
    "path": "man/contr.deviation.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/contrs.R\n\\name{contr.deviation}\n\\alias{contr.deviation}\n\\title{Deviation Contrast Matrix}\n\\usage{\ncontr.deviation(n, base = 1, contrasts = TRUE, sparse = FALSE)\n}\n\\arguments{\n\\item{n}{a vector of levels for a factor, or the number of levels.}\n\n\\item{base}{an integer specifying which group is considered the\n    baseline group. Ignored if \\code{contrasts} is \\code{FALSE}.}\n\n\\item{contrasts}{a logical indicating whether contrasts should be\n    computed.}\n\n\\item{sparse}{logical indicating if the result should be sparse\n    (of class \\code{\\link[Matrix:dgCMatrix-class]{dgCMatrix}}), using\n    package \\href{https://CRAN.R-project.org/package=Matrix}{\\pkg{Matrix}}.}\n}\n\\description{\nBuild a deviation contrast matrix, a type of \\emph{effects contrast} matrix.\n}\n\\details{\nIn effects coding, unlike treatment/dummy coding\n(\\code{\\link[stats:contrast]{stats::contr.treatment()}}), each contrast sums to 0. In regressions models,\nthis results in an intercept that represents the (unweighted) average of the\ngroup means. In ANOVA settings, this also guarantees that lower order effects\nrepresent \\emph{main} effects (and not \\emph{simple} or \\emph{conditional} effects, as is\nthe case when using R's default \\code{\\link[stats:contrast]{stats::contr.treatment()}}).\n\\cr\\cr\nDeviation coding (\\code{contr.deviation}) is a type of effects coding. With\ndeviation coding, the coefficients for factor variables are interpreted as\nthe difference of each factor level from the base level (this is the same\ninterpretation as with treatment/dummy coding). For example, for a factor\n\\code{group} with levels \"A\", \"B\", and \"C\", with \\code{contr.devation}, the intercept\nrepresents the overall mean (average of the group means for the 3 groups),\nand the coefficients \\code{groupB} and \\code{groupC} represent the differences between\nthe A group mean and the B and C group means, respectively.\n\\cr\\cr\nSum coding (\\code{\\link[stats:contrast]{stats::contr.sum()}}) is another type of effects coding. With sum\ncoding, the coefficients for factor variables are interpreted as the\ndifference of each factor level from \\strong{the grand (across-groups) mean}. For\nexample, for a factor \\code{group} with levels \"A\", \"B\", and \"C\", with\n\\code{contr.sum}, the intercept represents the overall mean (average of the group\nmeans for the 3 groups), and the coefficients \\code{group1} and \\code{group2} represent\nthe differences the\n\\strong{A} and \\strong{B} group means from the overall mean, respectively.\n}\n\\examples{\n\\dontshow{if (!identical(Sys.getenv(\"IN_PKGDOWN\"), \"true\")) withAutoprint(\\{ # examplesIf}\n\\donttest{\ndata(\"mtcars\")\n\nmtcars <- data_modify(mtcars, cyl = factor(cyl))\n\nc.treatment <- cbind(Intercept = 1, contrasts(mtcars$cyl))\nsolve(c.treatment)\n#>            4 6 8\n#> Intercept  1 0 0  # mean of the 1st level\n#> 6         -1 1 0  # 2nd level - 1st level\n#> 8         -1 0 1  # 3rd level - 1st level\n\ncontrasts(mtcars$cyl) <- contr.sum\nc.sum <- cbind(Intercept = 1, contrasts(mtcars$cyl))\nsolve(c.sum)\n#>                4      6      8\n#> Intercept  0.333  0.333  0.333   # overall mean\n#>            0.667 -0.333 -0.333   # deviation of 1st from overall mean\n#>           -0.333  0.667 -0.333   # deviation of 2nd from overall mean\n\n\ncontrasts(mtcars$cyl) <- contr.deviation\nc.deviation <- cbind(Intercept = 1, contrasts(mtcars$cyl))\nsolve(c.deviation)\n#>                4     6     8\n#> Intercept  0.333 0.333 0.333   # overall mean\n#> 6         -1.000 1.000 0.000   # 2nd level - 1st level\n#> 8         -1.000 0.000 1.000   # 3rd level - 1st level\n\n## With Interactions -----------------------------------------\nmtcars <- data_modify(mtcars, am = C(am, contr = contr.deviation))\nmtcars <- data_arrange(mtcars, select = c(\"cyl\", \"am\"))\n\nmm <- unique(model.matrix(~ cyl * am, data = mtcars))\nrownames(mm) <- c(\n  \"cyl4.am0\", \"cyl4.am1\", \"cyl6.am0\",\n  \"cyl6.am1\", \"cyl8.am0\", \"cyl8.am1\"\n)\n\nsolve(mm)\n#>             cyl4.am0 cyl4.am1 cyl6.am0 cyl6.am1 cyl8.am0 cyl8.am1\n#> (Intercept)    0.167    0.167    0.167    0.167    0.167    0.167  # overall mean\n#> cyl6          -0.500   -0.500    0.500    0.500    0.000    0.000  # cyl MAIN eff: 2nd - 1st\n#> cyl8          -0.500   -0.500    0.000    0.000    0.500    0.500  # cyl MAIN eff: 2nd - 1st\n#> am1           -0.333    0.333   -0.333    0.333   -0.333    0.333  # am MAIN eff\n#> cyl6:am1       1.000   -1.000   -1.000    1.000    0.000    0.000\n#> cyl8:am1       1.000   -1.000    0.000    0.000   -1.000    1.000\n}\n\\dontshow{\\}) # examplesIf}\n}\n\\seealso{\n\\code{\\link[stats:contrast]{stats::contr.sum()}}\n}\n"
  },
  {
    "path": "man/convert_na_to.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/convert_na_to.R\n\\name{convert_na_to}\n\\alias{convert_na_to}\n\\alias{convert_na_to.numeric}\n\\alias{convert_na_to.character}\n\\alias{convert_na_to.data.frame}\n\\title{Replace missing values in a variable or a data frame.}\n\\usage{\nconvert_na_to(x, ...)\n\n\\method{convert_na_to}{numeric}(x, replacement = NULL, verbose = TRUE, ...)\n\n\\method{convert_na_to}{character}(x, replacement = NULL, verbose = TRUE, ...)\n\n\\method{convert_na_to}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  replacement = NULL,\n  replace_num = replacement,\n  replace_char = replacement,\n  replace_fac = replacement,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A numeric, factor, or character vector, or a data frame.}\n\n\\item{...}{Not used.}\n\n\\item{replacement}{Numeric or character value that will be used to\nreplace \\code{NA}.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{replace_num}{Value to replace \\code{NA} when variable is of type numeric.}\n\n\\item{replace_char}{Value to replace \\code{NA} when variable is of type character.}\n\n\\item{replace_fac}{Value to replace \\code{NA} when variable is of type factor.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\n\\code{x}, where \\code{NA} values are replaced by \\code{replacement}.\n}\n\\description{\nReplace missing values in a variable or a data frame.\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\n# Convert NA to 0 in a numeric vector\nconvert_na_to(\n  c(9, 3, NA, 2, 3, 1, NA, 8),\n  replacement = 0\n)\n\n# Convert NA to \"missing\" in a character vector\nconvert_na_to(\n  c(\"a\", NA, \"d\", \"z\", NA, \"t\"),\n  replacement = \"missing\"\n)\n\n### For data frames\n\ntest_df <- data.frame(\n  x = c(1, 2, NA),\n  x2 = c(4, 5, NA),\n  y = c(\"a\", \"b\", NA)\n)\n\n# Convert all NA to 0 in numeric variables, and all NA to \"missing\" in\n# character variables\nconvert_na_to(\n  test_df,\n  replace_num = 0,\n  replace_char = \"missing\"\n)\n\n# Convert a specific variable in the data frame\nconvert_na_to(\n  test_df,\n  replace_num = 0,\n  replace_char = \"missing\",\n  select = \"x\"\n)\n\n# Convert all variables starting with \"x\"\nconvert_na_to(\n  test_df,\n  replace_num = 0,\n  replace_char = \"missing\",\n  select = starts_with(\"x\")\n)\n\n# Convert NA to 1 in variable 'x2' and to 0 in all other numeric\n# variables\nconvert_na_to(\n  test_df,\n  replace_num = 0,\n  select = list(x2 = 1)\n)\n\n}\n"
  },
  {
    "path": "man/convert_to_na.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/convert_to_na.R\n\\name{convert_to_na}\n\\alias{convert_to_na}\n\\alias{convert_to_na.numeric}\n\\alias{convert_to_na.factor}\n\\alias{convert_to_na.data.frame}\n\\title{Convert non-missing values in a variable into missing values.}\n\\usage{\nconvert_to_na(x, ...)\n\n\\method{convert_to_na}{numeric}(x, na = NULL, verbose = TRUE, ...)\n\n\\method{convert_to_na}{factor}(x, na = NULL, drop_levels = FALSE, verbose = TRUE, ...)\n\n\\method{convert_to_na}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  na = NULL,\n  drop_levels = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A vector, factor or a data frame.}\n\n\\item{...}{Not used.}\n\n\\item{na}{Numeric, character vector or logical (or a list of numeric, character\nvectors or logicals) with values that should be converted to \\code{NA}. Numeric\nvalues applied to numeric vectors, character values are used for factors,\ncharacter vectors or date variables, and logical values for logical vectors.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{drop_levels}{Logical, for factors, when specific levels are replaced\nby \\code{NA}, should unused levels be dropped?}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\n\\code{x}, where all values in \\code{na} are converted to \\code{NA}.\n}\n\\description{\nConvert non-missing values in a variable into missing values.\n}\n\\examples{\nx <- sample(1:6, size = 30, replace = TRUE)\nx\n# values 4 and 5 to NA\nconvert_to_na(x, na = 4:5)\n\n# data frames\nset.seed(123)\nx <- data.frame(\n  a = sample(1:6, size = 20, replace = TRUE),\n  b = sample(letters[1:6], size = 20, replace = TRUE),\n  c = sample(c(30:33, 99), size = 20, replace = TRUE)\n)\n# for all numerics, convert 5 to NA. Character/factor will be ignored.\nconvert_to_na(x, na = 5)\n\n# for numerics, 5 to NA, for character/factor, \"f\" to NA\nconvert_to_na(x, na = list(6, \"f\"))\n\n# select specific variables\nconvert_to_na(x, select = c(\"a\", \"b\"), na = list(6, \"f\"))\n}\n"
  },
  {
    "path": "man/data_arrange.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_arrange.R\n\\name{data_arrange}\n\\alias{data_arrange}\n\\title{Arrange rows by column values}\n\\usage{\ndata_arrange(data, select = NULL, safe = TRUE)\n}\n\\arguments{\n\\item{data}{A data frame, or an object that can be coerced to a data frame.}\n\n\\item{select}{Character vector of column names. Use a dash just before column\nname to arrange in decreasing order, for example \\code{\"-x1\"}.}\n\n\\item{safe}{Do not throw an error if one of the variables specified doesn't\nexist.}\n}\n\\value{\nA data frame.\n}\n\\description{\n\\code{data_arrange()} orders the rows of a data frame by the values of selected\ncolumns.\n}\n\\examples{\n\n# Arrange using several variables\ndata_arrange(head(mtcars), c(\"gear\", \"carb\"))\n\n# Arrange in decreasing order\ndata_arrange(head(mtcars), \"-carb\")\n\n# Throw an error if one of the variables specified doesn't exist\ntry(data_arrange(head(mtcars), c(\"gear\", \"foo\"), safe = FALSE))\n}\n"
  },
  {
    "path": "man/data_codebook.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_codebook.R\n\\name{data_codebook}\n\\alias{data_codebook}\n\\alias{print_html.data_codebook}\n\\alias{display.data_codebook}\n\\title{Generate a codebook of a data frame.}\n\\usage{\ndata_codebook(\n  data,\n  select = NULL,\n  exclude = NULL,\n  variable_label_width = NULL,\n  value_label_width = NULL,\n  max_values = 10,\n  range_at = 6,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\n\\method{print_html}{data_codebook}(\n  x,\n  font_size = \"100\\%\",\n  line_padding = 3,\n  row_color = \"#eeeeee\",\n  ...\n)\n\n\\method{display}{data_codebook}(\n  object,\n  format = \"markdown\",\n  font_size = \"100\\%\",\n  line_padding = 3,\n  row_color = \"#eeeeee\",\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame, or an object that can be coerced to a data frame.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{variable_label_width}{Length of variable labels. Longer labels will be\nwrapped at \\code{variable_label_width} chars. If \\code{NULL}, longer labels will not\nbe split into multiple lines. Only applies to \\emph{labelled data}.}\n\n\\item{value_label_width}{Length of value labels. Longer labels will be\nshortened, where the remaining part is truncated. Only applies to\n\\emph{labelled data} or factor levels.}\n\n\\item{max_values}{Number of maximum values that should be displayed. Can be\nused to avoid too many rows when variables have lots of unique values.}\n\n\\item{range_at}{Indicates how many unique values in a numeric vector are\nneeded in order to print a range for that variable instead of a frequency\ntable for all numeric values. Can be useful if the data contains numeric\nvariables with only a few unique values and where full frequency tables\ninstead of value ranges should be displayed.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings and messages on or off.}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{x}{A (grouped) data frame, a vector or a statistical model (for\n\\code{unstandardize()} cannot be a model).}\n\n\\item{font_size}{For HTML tables, the font size.}\n\n\\item{line_padding}{For HTML tables, the distance (in pixel) between lines.}\n\n\\item{row_color}{For HTML tables, the fill color for odd rows.}\n\n\\item{object}{An object returned by \\code{data_tabulate()}.}\n\n\\item{format}{String, indicating the output format. Can be \\code{\"markdown\"}\n\\code{\"html\"}, or \\code{\"tt\"}. \\code{format = \"html\"} create an HTML table using the \\emph{gt}\npackage. \\code{format = \"tt\"} creates a \\code{tinytable} object, which is either\nprinted as markdown or HTML table, depending on the environment. See\n\\code{\\link[insight:export_table]{insight::export_table()}} for details.}\n}\n\\value{\nA formatted data frame, summarizing the content of the data frame.\nReturned columns include the column index of the variables in the original\ndata frame (\\code{ID}), column name, variable label (if data is labelled), type\nof variable, number of missing values, unique values (or value range),\nvalue labels (for labelled data), and a frequency table (N for each value).\nMost columns are formatted as character vectors.\n}\n\\description{\n\\code{data_codebook()} generates codebooks from data frames, i.e. overviews\nof all variables and some more information about each variable (like\nlabels, values or value range, frequencies, amount of missing values).\n}\n\\note{\nThere are methods to \\code{print()} the data frame in a nicer output, as\nwell methods for printing in markdown or HTML format (\\code{print_md()} and\n\\code{print_html()}). The \\code{print()} method for text outputs passes arguments in\n\\code{...} to \\code{\\link[insight:export_table]{insight::export_table()}}.\n}\n\\examples{\ndata(iris)\ndata_codebook(iris, select = starts_with(\"Sepal\"))\n\ndata(efc)\ndata_codebook(efc)\n\n# shorten labels\ndata_codebook(efc, variable_label_width = 20, value_label_width = 15)\n\n# automatic range for numerics at more than 5 unique values\ndata(mtcars)\ndata_codebook(mtcars, select = starts_with(\"c\"))\n\n# force all values to be displayed\ndata_codebook(mtcars, select = starts_with(\"c\"), range_at = 100)\n}\n"
  },
  {
    "path": "man/data_duplicated.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_duplicated.R\n\\name{data_duplicated}\n\\alias{data_duplicated}\n\\title{Extract all duplicates}\n\\usage{\ndata_duplicated(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA dataframe, containing all duplicates.\n}\n\\description{\nExtract all duplicates, for visual inspection.\nNote that it also contains the first occurrence of future\nduplicates, unlike \\code{\\link[=duplicated]{duplicated()}} or \\code{\\link[dplyr:distinct]{dplyr::distinct()}}). Also\ncontains an additional column reporting the number of missing\nvalues for that row, to help in the decision-making when\nselecting which duplicates to keep.\n}\n\\examples{\ndf1 <- data.frame(\n  id = c(1, 2, 3, 1, 3),\n  year = c(2022, 2022, 2022, 2022, 2000),\n  item1 = c(NA, 1, 1, 2, 3),\n  item2 = c(NA, 1, 1, 2, 3),\n  item3 = c(NA, 1, 1, 2, 3)\n)\n\ndata_duplicated(df1, select = \"id\")\n\ndata_duplicated(df1, select = c(\"id\", \"year\"))\n\n# Filter to exclude duplicates\ndf2 <- df1[-c(1, 5), ]\ndf2\n\n}\n\\seealso{\n\\code{\\link[=data_unique]{data_unique()}}\n}\n\\keyword{duplicates}\n"
  },
  {
    "path": "man/data_extract.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_extract.R\n\\name{data_extract}\n\\alias{data_extract}\n\\alias{data_extract.data.frame}\n\\title{Extract one or more columns or elements from an object}\n\\usage{\ndata_extract(data, select, ...)\n\n\\method{data_extract}{data.frame}(\n  data,\n  select,\n  name = NULL,\n  extract = \"all\",\n  as_data_frame = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{The object to subset. Methods are currently available for data frames\nand data frame extensions (e.g., tibbles).}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{...}{For use by future methods.}\n\n\\item{name}{An optional argument that specifies the column to be used as\nnames for the vector elements after extraction. Must be specified either\nas literal variable name (e.g., \\code{column_name}) or as string\n(\\code{\"column_name\"}). \\code{name} will be ignored when a data frame is returned.}\n\n\\item{extract}{String, indicating which element will be extracted when \\code{select}\nmatches multiple variables. Can be \\code{\"all\"} (the default) to return all\nmatched variables, \\code{\"first\"} or \\code{\"last\"} to return the first or last match,\nor \\code{\"odd\"} and \\code{\"even\"} to return all odd-numbered or even-numbered\nmatches. Note that \\code{\"first\"} or \\code{\"last\"} return a vector (unless\n\\code{as_data_frame = TRUE}), while \\code{\"all\"} can return a vector (if only one\nmatch was found) \\emph{or} a data frame (for more than one match). Type safe\nreturn values are only possible when \\code{extract} is \\code{\"first\"} or \\code{\"last\"} (will\nalways return a vector) or when \\code{as_data_frame = TRUE} (always returns a\ndata frame).}\n\n\\item{as_data_frame}{Logical, if \\code{TRUE}, will always return a data frame,\neven if only one variable was matched. If \\code{FALSE}, either returns a vector\nor a data frame. See \\code{extract} for details.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA vector (or a data frame) containing the extracted element, or\n\\code{NULL} if no matching variable was found.\n}\n\\description{\n\\code{data_extract()} (or its alias \\code{extract()}) is similar to \\code{$}. It extracts\neither a single column or element from an object (e.g., a data frame, list),\nor multiple columns resp. elements.\n}\n\\details{\n\\code{data_extract()} can be used to select multiple variables or pull a\nsingle variable from a data frame. Thus, the return value is by default not\ntype safe - \\code{data_extract()} either returns a vector or a data frame.\n\\subsection{Extracting single variables (vectors)}{\nWhen \\code{select} is the name of a single column, or when select only matches\none column, a vector is returned. A single variable is also returned when\n\\code{extract} is either \\verb{\"first} or \\code{\"last\"}. Setting \\code{as_data_frame} to \\code{TRUE}\noverrides this behaviour and \\emph{always} returns a data frame.\n}\n\\subsection{Extracting a data frame of variables}{\nWhen \\code{select} is a character vector containing more than one column name (or\na numeric vector with more than one valid column indices), or when \\code{select}\nuses one of the supported select-helpers that match multiple columns, a\ndata frame is returned. Setting \\code{as_data_frame} to \\code{TRUE} \\emph{always} returns\na data frame.\n}\n}\n\\examples{\n# single variable\ndata_extract(mtcars, cyl, name = gear)\ndata_extract(mtcars, \"cyl\", name = gear)\ndata_extract(mtcars, -1, name = gear)\ndata_extract(mtcars, cyl, name = 0)\ndata_extract(mtcars, cyl, name = \"row.names\")\n\n# selecting multiple variables\nhead(data_extract(iris, starts_with(\"Sepal\")))\nhead(data_extract(iris, ends_with(\"Width\")))\nhead(data_extract(iris, 2:4))\n\n# select first of multiple variables\ndata_extract(iris, starts_with(\"Sepal\"), extract = \"first\")\n\n# select first of multiple variables, return as data frame\nhead(data_extract(iris, starts_with(\"Sepal\"), extract = \"first\", as_data_frame = TRUE))\n}\n"
  },
  {
    "path": "man/data_group.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_group.R\n\\name{data_group}\n\\alias{data_group}\n\\alias{data_ungroup}\n\\title{Create a grouped data frame}\n\\usage{\ndata_group(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\ndata_ungroup(data, verbose = TRUE, ...)\n}\n\\arguments{\n\\item{data}{A data frame}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{...}{Arguments passed down to other functions. Mostly not used yet.}\n}\n\\value{\nA grouped data frame, i.e. a data frame with additional information\nabout the grouping structure saved as attributes.\n}\n\\description{\nThis function is comparable to \\code{dplyr::group_by()}, but just\nfollowing the \\strong{datawizard} function design. \\code{data_ungroup()} removes the\ngrouping information from a grouped data frame.\n}\n\\examples{\n\\dontshow{if (requireNamespace(\"poorman\")) withAutoprint(\\{ # examplesIf}\ndata(efc)\nsuppressPackageStartupMessages(library(poorman, quietly = TRUE))\n\n# total mean\nefc \\%>\\%\n  summarize(mean_hours = mean(c12hour, na.rm = TRUE))\n\n# mean by educational level\nefc \\%>\\%\n  data_group(c172code) \\%>\\%\n  summarize(mean_hours = mean(c12hour, na.rm = TRUE))\n\\dontshow{\\}) # examplesIf}\n}\n"
  },
  {
    "path": "man/data_match.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_match.R\n\\name{data_match}\n\\alias{data_match}\n\\alias{data_filter}\n\\title{Return filtered or sliced data frame, or row indices}\n\\usage{\ndata_match(x, to, match = \"and\", return_indices = FALSE, remove_na = TRUE, ...)\n\ndata_filter(x, ...)\n}\n\\arguments{\n\\item{x}{A data frame.}\n\n\\item{to}{A data frame matching the specified conditions. Note that if\n\\code{match} is a value other than \\code{\"and\"}, the original row order might be\nchanged. See 'Details'.}\n\n\\item{match}{String, indicating with which logical operation matching\nconditions should be combined. Can be \\code{\"and\"} (or \\code{\"&\"}), \\code{\"or\"} (or \\code{\"|\"})\nor \\code{\"not\"} (or \\code{\"!\"}).}\n\n\\item{return_indices}{Logical, if \\code{TRUE}, return the vector of rows that\ncan be used to filter the original data frame. If \\code{FALSE} (default),\nreturns directly the filtered data frame instead of the row indices.}\n\n\\item{remove_na}{Logical, if \\code{TRUE}, missing values (\\code{NA}s) are removed before\nfiltering the data. This is the default behaviour, however, sometimes when\nrow indices are requested (i.e. \\code{return_indices=TRUE}), it might be useful\nto preserve \\code{NA} values, so returned row indices match the row indices of\nthe original data frame.}\n\n\\item{...}{A sequence of logical expressions indicating which rows to keep,\nor a numeric vector indicating the row indices of rows to keep. Can also be\na string representation of a logical expression (e.g. \\code{\"x > 4\"}), a\ncharacter vector (e.g. \\code{c(\"x > 4\", \"y == 2\")}) or a variable that contains\nthe string representation of a logical expression. These might be useful\nwhen used in packages to avoid defining undefined global variables.}\n}\n\\value{\nA filtered data frame, or the row indices that match the specified\nconfiguration.\n}\n\\description{\nReturn a filtered (or sliced) data frame or row indices of a data frame that\nmatch a specific condition. \\code{data_filter()} works like \\code{data_match()}, but works\nwith logical expressions or row indices of a data frame to specify matching\nconditions.\n}\n\\details{\nFor \\code{data_match()}, if \\code{match} is either \\code{\"or\"} or \\code{\"not\"}, the\noriginal row order from \\code{x} might be changed. If preserving row order is\nrequired, use \\code{data_filter()} instead.\n\n\\if{html}{\\out{<div class=\"sourceCode\">}}\\preformatted{# mimics subset() behaviour, preserving original row order\nhead(data_filter(mtcars[c(\"mpg\", \"vs\", \"am\")], vs == 0 | am == 1))\n#>                    mpg vs am\n#> Mazda RX4         21.0  0  1\n#> Mazda RX4 Wag     21.0  0  1\n#> Datsun 710        22.8  1  1\n#> Hornet Sportabout 18.7  0  0\n#> Duster 360        14.3  0  0\n#> Merc 450SE        16.4  0  0\n\n# re-sorting rows\nhead(data_match(mtcars[c(\"mpg\", \"vs\", \"am\")],\n                data.frame(vs = 0, am = 1),\n                match = \"or\"))\n#>                    mpg vs am\n#> Mazda RX4         21.0  0  1\n#> Mazda RX4 Wag     21.0  0  1\n#> Hornet Sportabout 18.7  0  0\n#> Duster 360        14.3  0  0\n#> Merc 450SE        16.4  0  0\n#> Merc 450SL        17.3  0  0\n}\\if{html}{\\out{</div>}}\n\nWhile \\code{data_match()} works with data frames to match conditions against,\n\\code{data_filter()} is basically a wrapper around \\verb{subset(subset = <filter>)}.\nHowever, unlike \\code{subset()}, it preserves label attributes and is useful when\nworking with labelled data.\n}\n\\examples{\ndata_match(mtcars, data.frame(vs = 0, am = 1))\ndata_match(mtcars, data.frame(vs = 0, am = c(0, 1)))\n\n# observations where \"vs\" is NOT 0 AND \"am\" is NOT 1\ndata_match(mtcars, data.frame(vs = 0, am = 1), match = \"not\")\n# equivalent to\ndata_filter(mtcars, vs != 0 & am != 1)\n\n# observations where EITHER \"vs\" is 0 OR \"am\" is 1\ndata_match(mtcars, data.frame(vs = 0, am = 1), match = \"or\")\n# equivalent to\ndata_filter(mtcars, vs == 0 | am == 1)\n\n# slice data frame by row indices\ndata_filter(mtcars, 5:10)\n\n# Define a custom function containing data_filter()\nmy_filter <- function(data, variable) {\n  data_filter(data, variable)\n}\nmy_filter(mtcars, \"cyl == 6\")\n\n# Pass complete filter-condition as string.\nmy_filter <- function(data, condition) {\n  data_filter(data, condition)\n}\nmy_filter(mtcars, \"am != 0\")\n\n# string can also be used directly as argument\ndata_filter(mtcars, \"am != 0\")\n\n# or as variable\nfl <- \"am != 0\"\ndata_filter(mtcars, fl)\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/data_merge.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_merge.R\n\\name{data_merge}\n\\alias{data_merge}\n\\alias{data_join}\n\\alias{data_merge.data.frame}\n\\alias{data_merge.list}\n\\title{Merge (join) two data frames, or a list of data frames}\n\\usage{\ndata_merge(x, ...)\n\ndata_join(x, ...)\n\n\\method{data_merge}{data.frame}(x, y, join = \"left\", by = NULL, id = NULL, verbose = TRUE, ...)\n\n\\method{data_merge}{list}(x, join = \"left\", by = NULL, id = NULL, verbose = TRUE, ...)\n}\n\\arguments{\n\\item{x, y}{A data frame to merge. \\code{x} may also be a list of data frames\nthat will be merged. Note that the list-method has no \\code{y} argument.}\n\n\\item{...}{Not used.}\n\n\\item{join}{Character vector, indicating the method of joining the data frames.\nCan be \\code{\"full\"}, \\code{\"left\"} (default), \\code{\"right\"}, \\code{\"inner\"}, \\code{\"anti\"}, \\code{\"semi\"}\nor \\code{\"bind\"}. See details below.}\n\n\\item{by}{Specifications of the columns used for merging.}\n\n\\item{id}{Optional name for ID column that will be created to indicate the\nsource data frames for appended rows. Only applies if \\code{join = \"bind\"}.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA merged data frame.\n}\n\\description{\nMerge (join) two data frames, or a list of data frames. However, unlike\nbase R's \\code{merge()}, \\code{data_merge()} offers a few more methods to join data\nframes, and it does not drop data frame nor column attributes.\n}\n\\section{Merging data frames}{\n\n\nMerging data frames is performed by adding rows (cases), columns\n(variables) or both from the source data frame (\\code{y}) to the target\ndata frame (\\code{x}). This usually requires one or more variables which\nare included in both data frames and that are used for merging, typically\nindicated with the \\code{by} argument. When \\code{by} contains a variable present\nin both data frames, cases are matched and filtered by identical values\nof \\code{by} in \\code{x} and \\code{y}.\n}\n\n\\section{Left- and right-joins}{\n\n\nLeft- and right joins usually don't add new rows (cases), but only new\ncolumns (variables) for existing cases in \\code{x}. For \\code{join = \"left\"} or\n\\code{join = \"right\"} to work, \\code{by} \\emph{must} indicate one or more columns that\nare included in both data frames. For \\code{join = \"left\"}, if \\code{by} is an\nidentifier variable, which is included in both \\code{x} and \\code{y}, all variables\nfrom \\code{y} are copied to \\code{x}, but only those cases from \\code{y} that have\nmatching values in their identifier variable in \\code{x} (i.e. all cases\nin \\code{x} that are also found in \\code{y} get the related values from the new\ncolumns in \\code{y}). If there is no match between identifiers in \\code{x} and \\code{y},\nthe copied variable from \\code{y} will get a \\code{NA} value for this particular\ncase. Other variables that occur both in \\code{x} and \\code{y}, but are not used\nas identifiers (with \\code{by}), will be renamed to avoid multiple identical\nvariable names. Cases in \\code{y} where values from the identifier have no\nmatch in \\code{x}'s identifier are removed. \\code{join = \"right\"} works in\na similar way as \\code{join = \"left\"}, just that only cases from \\code{x} that\nhave matching values in their identifier variable in \\code{y} are chosen.\n\nIn base R, these are equivalent to \\code{merge(x, y, all.x = TRUE)} and\n\\code{merge(x, y, all.y = TRUE)}.\n}\n\n\\section{Full joins}{\n\n\nFull joins copy all cases from \\code{y} to \\code{x}. For matching cases in both\ndata frames, values for new variables are copied from \\code{y} to \\code{x}. For\ncases in \\code{y} not present in \\code{x}, these will be added as new rows to \\code{x}.\nThus, full joins not only add new columns (variables), but also might\nadd new rows (cases).\n\nIn base R, this is equivalent to \\code{merge(x, y, all = TRUE)}.\n}\n\n\\section{Inner joins}{\n\n\nInner joins merge two data frames, however, only those rows (cases) are\nkept that are present in both data frames. Thus, inner joins usually\nadd new columns (variables), but also remove rows (cases) that only\noccur in one data frame.\n\nIn base R, this is equivalent to \\code{merge(x, y)}.\n}\n\n\\section{Binds}{\n\n\n\\code{join = \"bind\"} row-binds the complete second data frame \\code{y} to \\code{x}.\nUnlike simple \\code{rbind()}, which requires the same columns for both data\nframes, \\code{join = \"bind\"} will bind shared columns from \\code{y} to \\code{x}, and\nadd new columns from \\code{y} to \\code{x}.\n}\n\n\\examples{\n\nx <- data.frame(a = 1:3, b = c(\"a\", \"b\", \"c\"), c = 5:7, id = 1:3)\ny <- data.frame(c = 6:8, d = c(\"f\", \"g\", \"h\"), e = 100:102, id = 2:4)\n\nx\ny\n\n# \"by\" will default to all shared columns, i.e. \"c\" and \"id\". new columns\n# \"d\" and \"e\" will be copied from \"y\" to \"x\", but there are only two cases\n# in \"x\" that have the same values for \"c\" and \"id\" in \"y\". only those cases\n# have values in the copied columns, the other case gets \"NA\".\ndata_merge(x, y, join = \"left\")\n\n# we change the id-value here\nx <- data.frame(a = 1:3, b = c(\"a\", \"b\", \"c\"), c = 5:7, id = 1:3)\ny <- data.frame(c = 6:8, d = c(\"f\", \"g\", \"h\"), e = 100:102, id = 3:5)\n\nx\ny\n\n# no cases in \"y\" have the same matching \"c\" and \"id\" as in \"x\", thus\n# copied variables from \"y\" to \"x\" copy no values, all get NA.\ndata_merge(x, y, join = \"left\")\n\n# one case in \"y\" has a match in \"id\" with \"x\", thus values for this\n# case from the remaining variables in \"y\" are copied to \"x\", all other\n# values (cases) in those remaining variables get NA\ndata_merge(x, y, join = \"left\", by = \"id\")\n\ndata(mtcars)\nx <- mtcars[1:5, 1:3]\ny <- mtcars[28:32, 4:6]\n\n# add ID common column\nx$id <- 1:5\ny$id <- 3:7\n\n# left-join, add new variables and copy values from y to x,\n# where \"id\" values match\ndata_merge(x, y)\n\n# right-join, add new variables and copy values from x to y,\n# where \"id\" values match\ndata_merge(x, y, join = \"right\")\n\n# full-join\ndata_merge(x, y, join = \"full\")\n\n\ndata(mtcars)\nx <- mtcars[1:5, 1:3]\ny <- mtcars[28:32, c(1, 4:5)]\n\n# add ID common column\nx$id <- 1:5\ny$id <- 3:7\n\n# left-join, no matching rows (because columns \"id\" and \"disp\" are used)\n# new variables get all NA values\ndata_merge(x, y)\n\n# one common value in \"mpg\", so one row from y is copied to x\ndata_merge(x, y, by = \"mpg\")\n\n# only keep rows with matching values in by-column\ndata_merge(x, y, join = \"semi\", by = \"mpg\")\n\n# only keep rows with non-matching values in by-column\ndata_merge(x, y, join = \"anti\", by = \"mpg\")\n\n# merge list of data frames. can be of different rows\nx <- mtcars[1:5, 1:3]\ny <- mtcars[28:31, 3:5]\nz <- mtcars[11:18, c(1, 3:4, 6:8)]\nx$id <- 1:5\ny$id <- 4:7\nz$id <- 3:10\ndata_merge(list(x, y, z), join = \"bind\", by = \"id\", id = \"source\")\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/data_modify.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_modify.R\n\\name{data_modify}\n\\alias{data_modify}\n\\alias{data_modify.data.frame}\n\\title{Create new variables in a data frame}\n\\usage{\ndata_modify(data, ...)\n\n\\method{data_modify}{data.frame}(data, ..., .if = NULL, .at = NULL, .modify = NULL)\n}\n\\arguments{\n\\item{data}{A data frame}\n\n\\item{...}{One or more expressions that define the new variable name and the\nvalues or recoding of those new variables. These expressions can be one of:\n\\itemize{\n\\item A sequence of named, literal expressions, where the left-hand side refers\nto the name of the new variable, while the right-hand side represent the\nvalues of the new variable. Example: \\code{Sepal.Width = center(Sepal.Width)}.\n\\item A vector of length 1 (which will be recycled to match the number of rows\nin the data), or of same length as the data.\n\\item A variable that contains a value to be used. Example:\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{a <- \"abc\"\ndata_modify(iris, var_abc = a) # var_abc contains \"abc\"\n}\\if{html}{\\out{</div>}}\n\\item An expression can also be provided as string and wrapped in\n\\code{as_expr()}. Example:\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{data_modify(iris, as_expr(\"Sepal.Width = center(Sepal.Width)\"))\n# or\na <- \"center(Sepal.Width)\"\ndata_modify(iris, Sepal.Width = as_expr(a))\n# or\na <- \"Sepal.Width = center(Sepal.Width)\"\ndata_modify(iris, as_expr(a))\n}\\if{html}{\\out{</div>}}\n\nNote that \\code{as_expr()} is no real function, which cannot be used outside\nof \\code{data_modify()}, and hence it is not exported nor documented. Rather,\nit is only used for internally processing expressions.\n\\item Using \\code{NULL} as right-hand side removes a variable from the data frame.\nExample: \\code{Petal.Width = NULL}.\n\\item For data frames (including grouped ones), the function \\code{n()} can be used to\ncount the number of observations and thereby, for instance, create index\nvalues by using \\code{id = 1:n()} or \\code{id = 3:(n()+2)} and similar. Note that,\nlike \\code{as_expr()}, \\code{n()} is also no true function and cannot be used outside\nof \\code{data_modify()}.\n}\n\nNote that newly created variables can be used in subsequent expressions,\nincluding \\code{.at} or \\code{.if}. See also 'Examples'.}\n\n\\item{.if}{A function that returns \\code{TRUE} for columns in the data frame where\n\\code{.if} applies. This argument is used in combination with the \\code{.modify} argument.\nNote that only one of \\code{.at} or \\code{.if} can be provided, but not both at the same\ntime. Newly created variables in \\code{...} can also be selected, see 'Examples'.}\n\n\\item{.at}{A character vector of variable names that should be modified. This\nargument is used in combination with the \\code{.modify} argument. Note that only one\nof \\code{.at} or \\code{.if} can be provided, but not both at the same time. Newly created\nvariables in \\code{...} can also be selected, see 'Examples'.}\n\n\\item{.modify}{A function that modifies the variables defined in \\code{.at} or \\code{.if}.\nThis argument is used in combination with either the \\code{.at} or the \\code{.if} argument.\nNote that the modified variable (i.e. the result from \\code{.modify}) must be either\nof length 1 or of same length as the input variable.}\n}\n\\description{\nCreate new variables or modify existing variables in a data frame. Unlike \\code{base::transform()}, \\code{data_modify()}\ncan be used on grouped data frames, and newly created variables can be directly\nused.\n}\n\\note{\n\\code{data_modify()} can also be used inside functions. However, it is\nrecommended to pass the recode-expression as character vector or list of\ncharacters.\n}\n\\examples{\ndata(efc)\nnew_efc <- data_modify(\n  efc,\n  c12hour_c = center(c12hour),\n  c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),\n  c12hour_z2 = standardize(c12hour)\n)\nhead(new_efc)\n\n# using strings instead of literal expressions\nnew_efc <- data_modify(\n  efc,\n  as_expr(\"c12hour_c = center(c12hour)\"),\n  as_expr(\"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\"),\n  as_expr(\"c12hour_z2 = standardize(c12hour)\")\n)\nhead(new_efc)\n\n# using a character vector, provided a variable\nxpr <- c(\n  \"c12hour_c = center(c12hour)\",\n  \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\",\n  \"c12hour_z2 = standardize(c12hour)\"\n)\nnew_efc <- data_modify(efc, as_expr(xpr))\nhead(new_efc)\n\n# using character strings, provided as variable\nstand <- \"c12hour_c / sd(c12hour, na.rm = TRUE)\"\nnew_efc <- data_modify(\n  efc,\n  c12hour_c = center(c12hour),\n  c12hour_z = as_expr(stand)\n)\nhead(new_efc)\n\n# attributes - in this case, value and variable labels - are preserved\nstr(new_efc)\n\n# using `paste()` to build a string-expression\nto_standardize <- c(\"Petal.Length\", \"Sepal.Length\")\nout <- data_modify(\n  iris,\n  as_expr(\n    paste0(to_standardize, \"_stand = standardize(\", to_standardize, \")\")\n  )\n)\nhead(out)\n\n# overwrite existing variable, remove old variable\nout <- data_modify(iris, Petal.Length = 1 / Sepal.Length, Sepal.Length = NULL)\nhead(out)\n\n# works on grouped data\ngrouped_efc <- data_group(efc, \"c172code\")\nnew_efc <- data_modify(\n  grouped_efc,\n  c12hour_c = center(c12hour),\n  c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),\n  c12hour_z2 = standardize(c12hour),\n  id = 1:n()\n)\nhead(new_efc)\n\n# works from inside functions\nfoo1 <- function(data, ...) {\n  head(data_modify(data, ...))\n}\nfoo1(iris, SW_fraction = Sepal.Width / 10)\n# or\nfoo1(iris, as_expr(\"SW_fraction = Sepal.Width / 10\"))\n\n# also with string arguments, using `as_expr()`\nfoo2 <- function(data, modification) {\n  head(data_modify(data, as_expr(modification)))\n}\nfoo2(iris, \"SW_fraction = Sepal.Width / 10\")\n\n# modify at specific positions or if condition is met\nd <- iris[1:5, ]\ndata_modify(d, .at = \"Species\", .modify = as.numeric)\ndata_modify(d, .if = is.factor, .modify = as.numeric)\n\n# can be combined with dots\ndata_modify(d, new_length = Petal.Length * 2, .at = \"Species\", .modify = as.numeric)\n\n# new variables used in `.at` or `.if`\ndata_modify(\n  d,\n  new_length = Petal.Length * 2,\n  .at = c(\"Petal.Length\", \"new_length\"),\n  .modify = round\n)\n\n# combine \"extract_column_names()\" and \".at\" argument\nout <- data_modify(\n  d,\n  .at = extract_column_names(d, select = starts_with(\"Sepal\")),\n  .modify = as.factor\n)\n# \"Sepal.Length\" and \"Sepal.Width\" are now factors\nstr(out)\n\n}\n"
  },
  {
    "path": "man/data_partition.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_partition.R\n\\name{data_partition}\n\\alias{data_partition}\n\\title{Partition data}\n\\usage{\ndata_partition(\n  data,\n  proportion = 0.7,\n  by = NULL,\n  seed = NULL,\n  row_id = \".row_id\",\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{proportion}{Scalar (between 0 and 1) or numeric vector, indicating the\nproportion(s) of the training set(s). The sum of \\code{proportion} must not be\ngreater than 1. The remaining part will be used for the test set.}\n\n\\item{by}{A character vector indicating the name(s) of the column(s) used\nfor stratified partitioning.}\n\n\\item{seed}{A random number generator seed. Enter an integer (e.g. 123) so\nthat the random sampling will be the same each time you run the function.}\n\n\\item{row_id}{Character string, indicating the name of the column that\ncontains the row-id's.}\n\n\\item{verbose}{Toggle messages and warnings.}\n\n\\item{...}{Other arguments passed to or from other functions.}\n}\n\\value{\nA list of data frames. The list includes one training set per given\nproportion and the remaining data as test set. List elements of training\nsets are named after the given proportions (e.g., \\verb{$p_0.7}), the test set\nis named \\verb{$test}.\n}\n\\description{\nCreates data partitions (for instance, a training and a test set) based on a\ndata frame that can also be stratified (i.e., evenly spread a given factor)\nusing the \\code{by} argument.\n}\n\\examples{\ndata(iris)\nout <- data_partition(iris, proportion = 0.9)\nout$test\nnrow(out$p_0.9)\n\n# Stratify by group (equal proportions of each species)\nout <- data_partition(iris, proportion = 0.9, by = \"Species\")\nout$test\n\n# Create multiple partitions\nout <- data_partition(iris, proportion = c(0.3, 0.3))\nlapply(out, head)\n\n# Create multiple partitions, stratified by group - 30\\% equally sampled\n# from species in first training set, 50\\% in second training set and\n# remaining 20\\% equally sampled from each species in test set.\nout <- data_partition(iris, proportion = c(0.3, 0.5), by = \"Species\")\nlapply(out, function(i) table(i$Species))\n\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/data_peek.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_peek.R\n\\name{data_peek}\n\\alias{data_peek}\n\\alias{data_peek.data.frame}\n\\title{Peek at values and type of variables in a data frame}\n\\usage{\ndata_peek(x, ...)\n\n\\method{data_peek}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  width = NULL,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A data frame.}\n\n\\item{...}{not used.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{width}{Maximum width of line length to display. If \\code{NULL}, width will\nbe determined using \\code{options()$width}.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA data frame with three columns, containing information about\nthe name, type and first values of the input data frame.\n}\n\\description{\nThis function creates a table a data frame, showing all\ncolumn names, variable types and the first values (as many as fit into\nthe screen).\n}\n\\note{\nTo show only specific or a limited number of variables, use the\n\\code{select} argument, e.g. \\code{select = 1:5} to show only the first five variables.\n}\n\\examples{\ndata(efc)\ndata_peek(efc)\n# show variables two to four\ndata_peek(efc, select = 2:4)\n}\n"
  },
  {
    "path": "man/data_prefix_suffix.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_addprefix.R\n\\name{data_addprefix}\n\\alias{data_addprefix}\n\\alias{data_addsuffix}\n\\title{Add a prefix or suffix to column names}\n\\usage{\ndata_addprefix(\n  data,\n  pattern,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\ndata_addsuffix(\n  data,\n  pattern,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{pattern}{A character string, which will be added as prefix or suffix\nto the column names.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{...}{Other arguments passed to or from other functions.}\n}\n\\description{\nAdd a prefix or suffix to column names\n}\n\\examples{\n# Add prefix / suffix to all columns\nhead(data_addprefix(iris, \"NEW_\"))\nhead(data_addsuffix(iris, \"_OLD\"))\n\n}\n\\seealso{\n\\code{\\link[=data_rename]{data_rename()}} for more fine-grained column renaming.\n}\n"
  },
  {
    "path": "man/data_read.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_read.R, R/data_write.R\n\\name{data_read}\n\\alias{data_read}\n\\alias{data_write}\n\\title{Read (import) data files from various sources}\n\\usage{\ndata_read(\n  path,\n  path_catalog = NULL,\n  encoding = NULL,\n  convert_factors = TRUE,\n  password = NULL,\n  verbose = TRUE,\n  ...\n)\n\ndata_write(\n  data,\n  path,\n  delimiter = \",\",\n  convert_factors = FALSE,\n  save_labels = FALSE,\n  password = NULL,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{path}{Character string, the file path to the data file.}\n\n\\item{path_catalog}{Character string, path to the catalog file. Only relevant\nfor SAS data files.}\n\n\\item{encoding}{The character encoding used for the file. Usually not needed.}\n\n\\item{convert_factors}{If \\code{TRUE} (default), numeric variables, where all\nvalues have a value label, are assumed to be categorical and converted into\nfactors. If \\code{FALSE}, no variable types are guessed and no conversion of\nnumeric variables into factors will be performed. For \\code{data_read()}, this\nargument only applies to file types with \\emph{labelled data}, e.g. files from\nSPSS, SAS or Stata. See also section 'Differences to other packages'. For\n\\code{data_write()}, this argument only applies to the text (e.g. \\code{.txt} or\n\\code{.csv}) or spreadsheet file formats (like \\code{.xlsx}). Converting to factors\nmight be useful for these formats because labelled numeric variables are then\nconverted into factors and exported as character columns - else, value labels\nwould be lost and only numeric values are written to the file.}\n\n\\item{password}{Password for data encryption. If not \\code{NULL}, the data will be\nencrypted (for \\code{data_write()}) or decrypted (for \\code{data_read()}) using the\nprovided password. Encryption is currently only supported for R file formats\n(\\code{.rds}, \\code{.rda} and \\code{.rdata}). See the section \"Data encryption\" below for more\ninformation on the encryption method used.}\n\n\\item{verbose}{Toggle warnings and messages.}\n\n\\item{...}{Arguments passed to the related \\verb{read_*()} or \\verb{write_*()} functions.}\n\n\\item{data}{The data frame that should be written to a file.}\n\n\\item{delimiter}{For CSV-files, specifies the delimiter. Defaults to \\code{\",\"},\nbut in particular in European regions, \\code{\";\"} might be a useful alternative,\nespecially when exported CSV-files should be opened in Excel.}\n\n\\item{save_labels}{Only applies to CSV files. If \\code{TRUE}, value and variable\nlabels (if any) will be saved as additional CSV file. This file has the same\nfile name as the exported CSV file, but includes a \\code{\"_labels\"} suffix (i.e.\nwhen the file name is \\code{\"mydat.csv\"}, the additional file with value and\nvariable labels is named \\code{\"mydat_labels.csv\"}).}\n}\n\\value{\nA data frame.\n}\n\\description{\nThis functions imports data from various file types. It is a small wrapper\naround \\code{haven::read_spss()}, \\code{haven::read_stata()}, \\code{haven::read_sas()},\n\\code{readxl::read_excel()} and \\code{data.table::fread()} resp. \\code{readr::read_delim()}\n(the latter if package \\strong{data.table} is not installed). Thus, supported file\ntypes for importing data are data files from SPSS, SAS or Stata, Excel files\nor text files (like '.csv' files). All other file types are passed to\n\\code{rio::import()}. \\code{data_write()} works in a similar way.\n}\n\\section{Supported file types}{\n\n\\itemize{\n\\item \\code{data_read()} is a wrapper around the \\strong{haven}, \\strong{data.table}, \\strong{readr}\n\\strong{readxl}, \\strong{nanoparquet} and \\strong{rio} packages. Currently supported file\ntypes are \\code{.txt}, \\code{.csv}, \\code{.xls}, \\code{.xlsx}, \\code{.sav}, \\code{.por}, \\code{.dta}, \\code{.sas},\n\\code{.rda}, \\code{.parquet}, \\code{.rdata}, and \\code{.rds} (and related files). All other file\ntypes are passed to \\code{rio::import()}.\n\\item \\code{data_write()} is a wrapper around \\strong{haven}, \\strong{readr}, \\strong{nanoparquet},\nand \\strong{rio} packages, and supports writing files into all formats supported\nby these packages.\n}\n}\n\n\\section{Compressed files (zip) and URLs}{\n\n\\code{data_read()} can also read the above mentioned files from URLs or from\ninside zip-compressed files. Thus, \\code{path} can also be a URL to a file like\n\\code{\"http://www.url.com/file.csv\"}. When \\code{path} points to a zip-compressed file,\nand there are multiple files inside the zip-archive, then the first supported\nfile is extracted and loaded.\n}\n\n\\section{General behaviour}{\n\n\\code{data_read()} detects the appropriate \\verb{read_*()} function based on the\nfile-extension of the data file. Thus, in most cases it should be enough to\nonly specify the \\code{path} argument. However, if more control is needed, all\narguments in \\code{...} are passed down to the related \\verb{read_*()} function. The\nsame applies to \\code{data_write()}, i.e. based on the file extension provided in\n\\code{path}, the appropriate \\verb{write_*()} function is used automatically.\n}\n\n\\section{SPSS specific behaviour}{\n\n\\code{data_read()} does \\emph{not} import user-defined (\"tagged\") \\code{NA} values from\nSPSS, i.e. argument \\code{user_na} is always set to \\code{FALSE} when importing SPSS\ndata with the \\strong{haven} package. Use \\code{convert_to_na()} to define missing\nvalues in the imported data, if necessary. Furthermore, \\code{data_write()}\ncompresses SPSS files by default. If this causes problems with (older) SPSS\nversions, use \\code{compress = \"none\"}, for example\n\\code{data_write(data, \"myfile.sav\", compress = \"none\")}.\n}\n\n\\section{Differences to other packages that read foreign data formats}{\n\n\\code{data_read()} is most comparable to \\code{rio::import()}. For data files from\nSPSS, SAS or Stata, which support labelled data, variables are converted into\ntheir most appropriate type. The major difference to \\code{rio::import()} is for\ndata files from SPSS, SAS, or Stata, i.e. file types that support\n\\emph{labelled data}. \\code{data_read()} automatically converts fully labelled numeric\nvariables into factors, where imported value labels will be set as factor\nlevels. If a numeric variable has \\emph{no} value labels or less value labels than\nvalues, it is not converted to factor. In this case, value labels are\npreserved as \\code{\"labels\"} attribute. Character vectors are preserved. Use\n\\code{convert_factors = FALSE} to remove the automatic conversion of numeric\nvariables to factors.\n}\n\n\\section{Data encryption}{\n\n\\code{data_read()} and \\code{data_write()} support data encryption for R file formats\n(\\code{.rds}, \\code{.rda} and \\code{.rdata}). To encrypt a file, provide a password to the\n\\code{password} argument in \\code{data_write()}. To decrypt the file, provide the same\npassword to \\code{data_read()}. The encryption is based on the \\strong{openssl} package\nand uses the AES-GCM algorithm (see \\code{?openssl::aes_gcm_encrypt}) with a\n256-bit key (see \\code{?openssl::sha256}). Thus, data can also be decrypted without\nrelying on the \\strong{datawizard} package, e.g. using following code:\n\n\\if{html}{\\out{<div class=\"sourceCode\">}}\\preformatted{encrypted_data <- readRDS(datafile)\nkey <- openssl::sha256(charToRaw(\"<password>\"))\nout <- openssl::aes_gcm_decrypt(encrypted_data, key = key)\ndecrypted_data <- unserialize(out)\n}\\if{html}{\\out{</div>}}\n\n\\strong{Warning:} Do not lose your \\code{password}, else you will not be able to\ndecrypt the data again!\n}\n\n"
  },
  {
    "path": "man/data_relocate.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_relocate.R, R/data_remove.R\n\\name{data_relocate}\n\\alias{data_relocate}\n\\alias{data_reorder}\n\\alias{data_remove}\n\\title{Relocate (reorder) columns of a data frame}\n\\usage{\ndata_relocate(\n  data,\n  select,\n  before = NULL,\n  after = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\ndata_reorder(\n  data,\n  select,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\ndata_remove(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{before, after}{Destination of columns. Supplying neither will move\ncolumns to the left-hand side; specifying both is an error. Can be a\ncharacter vector, indicating the name of the destination column, or a\nnumeric value, indicating the index number of the destination column.\nIf \\code{-1}, will be added before or after the last column.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{...}{Arguments passed down to other functions. Mostly not used yet.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n}\n\\value{\nA data frame with reordered columns.\n}\n\\description{\n\\code{data_relocate()} will reorder columns to specific positions, indicated by\n\\code{before} or \\code{after}. \\code{data_reorder()} will instead move selected columns to\nthe beginning of a data frame. Finally, \\code{data_remove()} removes columns\nfrom a data frame. All functions support select-helpers that allow flexible\nspecification of a search pattern to find matching columns, which should\nbe reordered or removed.\n}\n\\examples{\n# Reorder columns\nhead(data_relocate(iris, select = \"Species\", before = \"Sepal.Length\"))\nhead(data_relocate(iris, select = \"Species\", before = \"Sepal.Width\"))\nhead(data_relocate(iris, select = \"Sepal.Width\", after = \"Species\"))\n# which is same as\nhead(data_relocate(iris, select = \"Sepal.Width\", after = -1))\n\n# Reorder multiple columns\nhead(data_relocate(iris, select = c(\"Species\", \"Petal.Length\"), after = \"Sepal.Width\"))\n# which is same as\nhead(data_relocate(iris, select = c(\"Species\", \"Petal.Length\"), after = 2))\n\n# Reorder columns\nhead(data_reorder(iris, c(\"Species\", \"Sepal.Length\")))\n\n# Remove columns\nhead(data_remove(iris, \"Sepal.Length\"))\nhead(data_remove(iris, starts_with(\"Sepal\")))\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/data_rename.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_rename.R\n\\name{data_rename}\n\\alias{data_rename}\n\\alias{data_rename_rows}\n\\title{Rename columns and variable names}\n\\usage{\ndata_rename(data, select = NULL, replacement = NULL, ...)\n\ndata_rename_rows(data, rows = NULL)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{replacement}{Character vector. Can be one of the following:\n\\itemize{\n\\item A character vector that indicates the new names of the columns selected\nin \\code{select}. \\code{select} and \\code{replacement} must be of the same length.\n\\item A string (i.e. character vector of length 1) with a \"glue\" styled\npattern. Currently supported tokens are:\n\\itemize{\n\\item \\code{{col}} which will be replaced by the column name, i.e. the\ncorresponding value in \\code{select}.\n\\item \\code{{n}} will be replaced by the number of the variable that is replaced.\n\\item \\code{{letter}} will be replaced by alphabetical letters in sequential\norder.\nIf more than 26 letters are required, letters are repeated, but have\nsequential numeric indices (e.g., \\code{a1} to \\code{z1}, followed by \\code{a2} to\n\\code{z2}).\n\\item Finally, the name of a user-defined object that is available in the\nenvironment can be used. Note that the object's name is not allowed to\nbe one of the pre-defined tokens, \\code{\"col\"}, \\code{\"n\"} and \\code{\"letter\"}.\n}\n\nAn example for the use of tokens is...\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{data_rename(\n  mtcars,\n  select = c(\"am\", \"vs\"),\n  replacement = \"new_name_from_\\{col\\}\"\n)\n}\\if{html}{\\out{</div>}}\n\n... which would return new column names \\code{new_name_from_am} and\n\\code{new_name_from_vs}. See 'Examples'.\n}\n\nIf \\code{select} is a named vector, \\code{replacement} is ignored.}\n\n\\item{...}{Other arguments passed to or from other functions.}\n\n\\item{rows}{Vector of row names.}\n}\n\\value{\nA modified data frame.\n}\n\\description{\nSafe and intuitive functions to rename variables or rows in\ndata frames. \\code{data_rename()} will rename column names, i.e. it facilitates\nrenaming variables. \\code{data_rename_rows()} is a convenient shortcut\nto add or rename row names of a data frame, but unlike \\code{row.names()}, its\ninput and output is a data frame, thus, integrating smoothly into a\npossible pipe-workflow.\n}\n\\details{\n\\code{select} can also be a named character vector. In this case, the names are\nused to rename the columns in the output data frame. If you have a named\nlist, use \\code{unlist()} to convert it to a named vector. See 'Examples'.\n}\n\\examples{\n# Rename columns\nhead(data_rename(iris, \"Sepal.Length\", \"length\"))\n\n# Use named vector to rename\nhead(data_rename(iris, c(length = \"Sepal.Length\", width = \"Sepal.Width\")))\n\n# Change all\nhead(data_rename(iris, replacement = paste0(\"Var\", 1:5)))\n\n# Use glue-styled patterns\nhead(data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"formerly_{col}\"))\nhead(data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"{col}_is_column_{n}\"))\nhead(data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"new_{letter}\"))\n\n# User-defined glue-styled patterns from objects in environment\nx <- c(\"hi\", \"there\", \"!\")\nhead(data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"col_{x}\"))\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/data_replicate.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_replicate.R\n\\name{data_replicate}\n\\alias{data_replicate}\n\\title{Expand (i.e. replicate rows) a data frame}\n\\usage{\ndata_replicate(\n  data,\n  expand = NULL,\n  select = NULL,\n  exclude = NULL,\n  remove_na = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{expand}{The name of the column that contains the counts of replications\nfor each row. Can also be a numeric value, indicating the position of that\ncolumn. Note that the variable indicated by \\code{expand} must be an integer vector.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{remove_na}{Logical. If \\code{TRUE}, missing values in the column\nprovided in \\code{expand} are removed from the data frame. If \\code{FALSE} and \\code{expand}\ncontains missing values, the function will throw an error.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{...}{Currently not used.}\n}\n\\value{\nA dataframe with each row replicated as many times as defined in \\code{expand}.\n}\n\\description{\nExpand a data frame by replicating rows based on another variable that\ncontains the counts of replications per row.\n}\n\\examples{\ndata(mtcars)\ndata_replicate(head(mtcars), \"carb\")\n}\n"
  },
  {
    "path": "man/data_restoretype.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_restoretype.R\n\\name{data_restoretype}\n\\alias{data_restoretype}\n\\title{Restore the type of columns according to a reference data frame}\n\\usage{\ndata_restoretype(data, reference = NULL, ...)\n}\n\\arguments{\n\\item{data}{A data frame for which to restore the column types.}\n\n\\item{reference}{A reference data frame from which to find the correct\ncolumn types. If \\code{NULL}, each column is converted to numeric if it doesn't\ngenerate \\code{NA}s. For example, \\code{c(\"1\", \"2\")} can be converted to numeric but not\n\\code{c(\"Sepal.Length\")}.}\n\n\\item{...}{Currently not used.}\n}\n\\value{\nA data frame with columns whose types have been restored based on the\nreference data frame.\n}\n\\description{\nRestore the type of columns according to a reference data frame\n}\n\\examples{\ndata <- data.frame(\n  Sepal.Length = c(\"1\", \"3\", \"2\"),\n  Species = c(\"setosa\", \"versicolor\", \"setosa\"),\n  New = c(\"1\", \"3\", \"4\")\n)\n\nfixed <- data_restoretype(data, reference = iris)\nsummary(fixed)\n}\n"
  },
  {
    "path": "man/data_rotate.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_rotate.R\n\\name{data_rotate}\n\\alias{data_rotate}\n\\alias{data_transpose}\n\\title{Rotate a data frame}\n\\usage{\ndata_rotate(data, rownames = NULL, colnames = FALSE, verbose = TRUE)\n\ndata_transpose(data, rownames = NULL, colnames = FALSE, verbose = TRUE)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{rownames}{Character vector (optional). If not \\code{NULL}, the data frame's\nrownames will be added as (first) column to the output, with \\code{rownames}\nbeing the name of this column.}\n\n\\item{colnames}{Logical or character vector (optional). If \\code{TRUE}, the values\nof the first column in \\code{x} will be used as column names in the rotated data\nframe. If a character vector, values from that column are used as column\nnames.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA (rotated) data frame.\n}\n\\description{\nThis function rotates a data frame, i.e. columns become rows and vice versa.\nIt's the equivalent of using \\code{t()} but restores the \\code{data.frame} class,\npreserves attributes and prints a warning if the data type is\nmodified (see example).\n}\n\\examples{\nx <- mtcars[1:3, 1:4]\n\nx\n\ndata_rotate(x)\ndata_rotate(x, rownames = \"property\")\n\n# use values in 1. column as column name\ndata_rotate(x, colnames = TRUE)\ndata_rotate(x, rownames = \"property\", colnames = TRUE)\n\n# use either first column or specific column for column names\nx <- data.frame(a = 1:5, b = 11:15, c = 21:25)\ndata_rotate(x, colnames = TRUE)\ndata_rotate(x, colnames = \"c\")\n\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/data_seek.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_seek.R\n\\name{data_seek}\n\\alias{data_seek}\n\\title{Find variables by their names, variable or value labels}\n\\usage{\ndata_seek(data, pattern, seek = c(\"names\", \"labels\"), fuzzy = FALSE)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{pattern}{Character string (regular expression) to be matched in \\code{data}.\nMay also be a character vector of length > 1. \\code{pattern} is searched for in\ncolumn names, variable label and value labels attributes, or factor levels of\nvariables in \\code{data}.}\n\n\\item{seek}{Character vector, indicating where \\code{pattern} is sought. Use one\nor more of the following options:\n\\itemize{\n\\item \\code{\"names\"}: Searches in column names. \\code{\"column_names\"} and \\code{\"columns\"} are\naliases for \\code{\"names\"}.\n\\item \\code{\"labels\"}: Searches in variable labels. Only applies when a \\code{label} attribute\nis set for a variable.\n\\item \\code{\"values\"}: Searches in value labels or factor levels. Only applies when a\n\\code{labels} attribute is set for a variable, or if a variable is a factor.\n\\code{\"levels\"} is an alias for \\code{\"values\"}.\n\\item \\code{\"all\"}: Searches in all of the above.\n}}\n\n\\item{fuzzy}{Logical. If \\code{TRUE}, \"fuzzy matching\" (partial and close distance\nmatching) will be used to find \\code{pattern}.}\n}\n\\value{\nA data frame with three columns: the column index, the column name\nand - if available - the variable label of all matched variables in \\code{data}.\n}\n\\description{\nThis functions seeks variables in a data frame, based on patterns\nthat either match the variable name (column name), variable labels, value labels\nor factor levels. Matching variable and value labels only works for \"labelled\"\ndata, i.e. when the variables either have a \\code{label} attribute or \\code{labels}\nattribute.\n\n\\code{data_seek()} is particular useful for larger data frames with labelled\ndata - finding the correct variable name can be a challenge. This function\nhelps to find the required variables, when only certain patterns of variable\nnames or labels are known.\n}\n\\examples{\n# seek variables with \"Length\" in variable name or labels\ndata_seek(iris, \"Length\")\n\n# seek variables with \"dependency\" in names or labels\n# column \"e42dep\" has a label-attribute \"elder's dependency\"\ndata(efc)\ndata_seek(efc, \"dependency\")\n\n# \"female\" only appears as value label attribute - default search is in\n# variable names and labels only, so no match\ndata_seek(efc, \"female\")\n# when we seek in all sources, we find the variable \"e16sex\"\ndata_seek(efc, \"female\", seek = \"all\")\n\n# typo, no match\ndata_seek(iris, \"Lenght\")\n# typo, fuzzy match\ndata_seek(iris, \"Lenght\", fuzzy = TRUE)\n}\n"
  },
  {
    "path": "man/data_separate.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_separate.R\n\\name{data_separate}\n\\alias{data_separate}\n\\title{Separate single variable into multiple variables}\n\\usage{\ndata_separate(\n  data,\n  select = NULL,\n  new_columns = NULL,\n  separator = \"[^[:alnum:]]+\",\n  guess_columns = NULL,\n  merge_multiple = FALSE,\n  merge_separator = \"\",\n  fill = \"right\",\n  extra = \"drop_right\",\n  convert_na = TRUE,\n  exclude = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{new_columns}{The names of the new columns, as character vector. If\nmore than one variable was selected (in \\code{select}), the new names are prefixed\nwith the name of the original column. \\code{new_columns} can also be a list of\n(named) character vectors when multiple variables should be separated. See\n'Examples'.}\n\n\\item{separator}{Separator between columns. Can be a character vector, which\nis then treated as regular expression, or a numeric vector that indicates at\nwhich positions the string values will be split.}\n\n\\item{guess_columns}{If \\code{new_columns} is not given, the required number of\nnew columns is guessed based on the results of value splitting. For example,\nif a variable is split into three new columns, this will be considered as\nthe required number of new columns, and columns are named \\code{\"split_1\"},\n\\code{\"split_2\"} and \\code{\"split_3\"}. When values from a variable are split into\ndifferent amount of new columns, the \\code{guess_column} can be either \\code{\"mode\"}\n(number of new columns is based on the most common number of splits), \\code{\"min\"}\nor \\code{\"max\"} to use the minimum resp. maximum number of possible splits as\nrequired number of columns.}\n\n\\item{merge_multiple}{Logical, if \\code{TRUE} and more than one variable is selected\nfor separating, new columns can be merged. Value pairs of all split variables\nare merged.}\n\n\\item{merge_separator}{Separator string when \\code{merge_multiple = TRUE}. Defines\nthe string that is used to merge values together.}\n\n\\item{fill}{How to deal with values that return fewer new columns after\nsplitting? Can be \\code{\"left\"} (fill missing columns from the left with \\code{NA}),\n\\code{\"right\"} (fill missing columns from the right with \\code{NA}) or \\code{\"value_left\"}\nor \\code{\"value_right\"} to fill missing columns from left or right with the\nleft-most or right-most values.}\n\n\\item{extra}{How to deal with values that return too many new columns after\nsplitting? Can be \\code{\"drop_left\"} or \\code{\"drop_right\"} to drop the left-most or\nright-most values, or \\code{\"merge_left\"} or \\code{\"merge_right\"} to merge the left-\nor right-most value together, and keeping all remaining values as is.}\n\n\\item{convert_na}{Logical, if \\code{TRUE}, character \\code{\"NA\"} values are converted\ninto real \\code{NA} values.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{append}{Logical, if \\code{FALSE} (default), removes original columns that\nwere separated. If \\code{TRUE}, all columns are preserved and the new columns are\nappended to the data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{...}{Currently not used.}\n}\n\\value{\nA data frame with the newly created variable(s), or - when \\code{append = TRUE} -\n\\code{data} including new variables.\n}\n\\description{\nSeparates a single variable into multiple new variables.\n}\n\\examples{\n# simple case\nd <- data.frame(\n  x = c(\"1.a.6\", \"2.b.7\", \"3.c.8\"),\n  stringsAsFactors = FALSE\n)\nd\ndata_separate(d, new_columns = c(\"a\", \"b\", \"c\"))\n\n# guess number of columns\nd <- data.frame(\n  x = c(\"1.a.6\", NA, \"2.b.6.7\", \"3.c\", \"x.y.z\"),\n  stringsAsFactors = FALSE\n)\nd\ndata_separate(d, guess_columns = \"mode\")\n\ndata_separate(d, guess_columns = \"max\")\n\n# drop left-most column\ndata_separate(d, guess_columns = \"mode\", extra = \"drop_left\")\n\n# merge right-most column\ndata_separate(d, guess_columns = \"mode\", extra = \"merge_right\")\n\n# fill columns with fewer values with left-most values\ndata_separate(d, guess_columns = \"mode\", fill = \"value_left\")\n\n# fill and merge\ndata_separate(\n  d,\n  guess_columns = \"mode\",\n  fill = \"value_left\",\n  extra = \"merge_right\"\n)\n\n# multiple columns to split\nd <- data.frame(\n  x = c(\"1.a.6\", \"2.b.7\", \"3.c.8\"),\n  y = c(\"x.y.z\", \"10.11.12\", \"m.n.o\"),\n  stringsAsFactors = FALSE\n)\nd\n# split two columns, default column names\ndata_separate(d, guess_columns = \"mode\")\n\n# split into new named columns, repeating column names\ndata_separate(d, new_columns = c(\"a\", \"b\", \"c\"))\n\n# split selected variable new columns\ndata_separate(d, select = \"y\", new_columns = c(\"a\", \"b\", \"c\"))\n\n# merge multiple split columns\ndata_separate(\n  d,\n  new_columns = c(\"a\", \"b\", \"c\"),\n  merge_multiple = TRUE\n)\n\n# merge multiple split columns\ndata_separate(\n  d,\n  new_columns = c(\"a\", \"b\", \"c\"),\n  merge_multiple = TRUE,\n  merge_separator = \"-\"\n)\n\n# separate multiple columns, give proper column names\nd_sep <- data.frame(\n  x = c(\"1.a.6\", \"2.b.7.d\", \"3.c.8\", \"5.j\"),\n  y = c(\"m.n.99.22\", \"77.f.g.34\", \"44.9\", NA),\n  stringsAsFactors = FALSE\n)\n\ndata_separate(\n  d_sep,\n  select = c(\"x\", \"y\"),\n  new_columns = list(\n    x = c(\"A\", \"B\", \"C\"), # separate \"x\" into three columns\n    y = c(\"EE\", \"FF\", \"GG\", \"HH\") # separate \"y\" into four columns\n  ),\n  verbose = FALSE\n)\n}\n\\seealso{\n\\code{\\link[=data_unite]{data_unite()}}\n}\n"
  },
  {
    "path": "man/data_summary.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_summary.R\n\\name{data_summary}\n\\alias{data_summary}\n\\alias{data_summary.data.frame}\n\\title{Summarize data}\n\\usage{\ndata_summary(x, ...)\n\n\\method{data_summary}{data.frame}(x, ..., by = NULL, remove_na = FALSE, suffix = NULL)\n}\n\\arguments{\n\\item{x}{A (grouped) data frame.}\n\n\\item{...}{One or more named expressions that define the new variable name\nand the function to compute the summary statistic. Example:\n\\code{mean_sepal_width = mean(Sepal.Width)}. The expression can also be provided\nas a character string, e.g. \\code{\"mean_sepal_width = mean(Sepal.Width)\"}. The\nsummary function \\code{n()} can be used to count the number of observations.}\n\n\\item{by}{Optional character string, indicating the names of one or more\nvariables in the data frame. If supplied, the data will be split by these\nvariables and summary statistics will be computed for each group.}\n\n\\item{remove_na}{Logical. If \\code{TRUE}, missing values are omitted from the\ngrouping variable. If \\code{FALSE} (default), missing values are included as a\nlevel in the grouping variable.}\n\n\\item{suffix}{Optional, suffixes to be added to the new variable names,\nespecially useful when a function returns several values (e.g. \\code{quantile()}).\nCan be:\n\\itemize{\n\\item a character vector: all expressions in \\code{...} must return the same number\nof values as elements in \\code{suffix}.\n\\item a list of named character vectors: the names of elements in \\code{suffix} must\nmatch the names of the expressions. It is also allowed to specify suffixes\nfor selected expressions only.\n}\n\nThe new column names are a combination of the left-hand side (i.e.,\nthe name) of the expression and the related suffixes. If \\code{suffix = NULL} (the\ndefault), and a summary expression returns multiple values, either the names\nof the returned values (if any) or automatically numbered suffixes such as\n\\verb{_1}, \\verb{_2}, etc. are used. See 'Examples'.}\n}\n\\value{\nA data frame with the requested summary statistics.\n}\n\\description{\nThis function can be used to compute summary statistics for a\ndata frame or a matrix.\n}\n\\examples{\ndata(iris)\ndata_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width))\ndata_summary(\n  iris,\n  MW = mean(Sepal.Width),\n  SD = sd(Sepal.Width),\n  by = \"Species\"\n)\n\n# same as\nd <- data_group(iris, \"Species\")\ndata_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width))\n\n# multiple groups\ndata(mtcars)\ndata_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c(\"am\", \"gear\"))\n\n# expressions can also be supplied as character strings\ndata_summary(mtcars, \"MW = mean(mpg)\", \"SD = sd(mpg)\", by = c(\"am\", \"gear\"))\n\n# count observations within groups\ndata_summary(mtcars, observations = n(), by = c(\"am\", \"gear\"))\n\n# first and last observations of \"mpg\" within groups\ndata_summary(\n  mtcars,\n  first = mpg[1],\n  last = mpg[length(mpg)],\n  by = c(\"am\", \"gear\")\n)\n\n# allow more than one-column-summaries for expressions\nd <- data.frame(\n  x = rnorm(100, 1, 1),\n  y = rnorm(100, 2, 2),\n  groups = rep(1:4, each = 25)\n)\n\n# since we have multiple columns for one expression, the names of the\n# returned summary results are used as suffix by default\ndata_summary(\n  d,\n  quant_x = quantile(x, c(0.25, 0.75)),\n  mean_x = mean(x),\n  quant_y = quantile(y, c(0.25, 0.5, 0.75))\n)\n\n# if a summary function, like `fivenum()`, returns no named vector, suffixes\n# are automatically numbered\ndata_summary(\n  d,\n  quant_x = quantile(x, c(0.25, 0.75)),\n  mean_x = mean(x),\n  fivenum_y = fivenum(y)\n)\n\n# specify column suffix for expressions, matching by names\ndata_summary(\n  d,\n  quant_x = quantile(x, c(0.25, 0.75)),\n  mean_x = mean(x),\n  quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n  suffix = list(quant_y = c(\"_Q1\", \"_Q2\", \"_Q3\"))\n)\n\n# name multiple expression suffixes, grouped by variable\ndata_summary(\n  d,\n  quant_x = quantile(x, c(0.25, 0.75)),\n  mean_x = mean(x),\n  quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n  suffix = list(quant_x = c(\"Q1\", \"Q3\"), quant_y = c(\"_Q1\", \"_Q2\", \"_Q3\")),\n  by = \"groups\"\n)\n\n}\n"
  },
  {
    "path": "man/data_tabulate.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_tabulate.R\n\\name{data_tabulate}\n\\alias{data_tabulate}\n\\alias{data_tabulate.default}\n\\alias{data_tabulate.data.frame}\n\\alias{print.datawizard_table}\n\\alias{display.datawizard_table}\n\\title{Create frequency and crosstables of variables}\n\\usage{\ndata_tabulate(x, ...)\n\n\\method{data_tabulate}{default}(\n  x,\n  by = NULL,\n  drop_levels = FALSE,\n  weights = NULL,\n  remove_na = FALSE,\n  proportions = NULL,\n  name = NULL,\n  verbose = TRUE,\n  ...\n)\n\n\\method{data_tabulate}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  by = NULL,\n  drop_levels = FALSE,\n  weights = NULL,\n  remove_na = FALSE,\n  proportions = NULL,\n  collapse = FALSE,\n  verbose = TRUE,\n  ...\n)\n\n\\method{print}{datawizard_table}(x, big_mark = NULL, ...)\n\n\\method{display}{datawizard_table}(object, big_mark = NULL, format = \"markdown\", ...)\n}\n\\arguments{\n\\item{x}{A (grouped) data frame, a vector or factor.}\n\n\\item{...}{not used.}\n\n\\item{by}{Optional vector or factor. If supplied, a crosstable is created.\nIf \\code{x} is a data frame, \\code{by} can also be a character string indicating the\nname of a variable in \\code{x}.}\n\n\\item{drop_levels}{Logical, if \\code{FALSE}, factor levels that do not occur in\nthe data are included in the table (with frequency of zero), else unused\nfactor levels are dropped from the frequency table.}\n\n\\item{weights}{Optional numeric vector of weights. Must be of the same length\nas \\code{x}. If \\code{weights} is supplied, weighted frequencies are calculated.}\n\n\\item{remove_na}{Logical, if \\code{FALSE}, missing values are included in the\nfrequency or crosstable, else missing values are omitted. Note that the\ndefault for the \\code{as.table()} method is \\code{remove_na = TRUE}, so that missing\nvalues are not included in the returned table, which makes more sense for\npost-processing of the table, e.g. using \\code{chisq.test()}.}\n\n\\item{proportions}{Optional character string, indicating the type of\npercentages to be calculated. Only applies to crosstables, i.e. when \\code{by} is\nnot \\code{NULL}. Can be \\code{\"row\"} (row percentages), \\code{\"column\"} (column percentages)\nor \\code{\"full\"} (to calculate relative frequencies for the full table).}\n\n\\item{name}{Optional character string, which includes the name that is used\nfor printing.}\n\n\\item{verbose}{Toggle warnings and messages.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{collapse}{Logical, if \\code{TRUE} collapses multiple tables into one larger\ntable for printing. This affects only printing, not the returned object.}\n\n\\item{big_mark}{Optional character string, indicating the big mark that is\nused for large numbers. If \\code{NULL} (default), a big mark is added automatically for\nlarge numbers (i.e. numbers with more than 5 digits). If you want to remove\nthe big mark, set \\code{big_mark = \"\"}.}\n\n\\item{object}{An object returned by \\code{data_tabulate()}.}\n\n\\item{format}{String, indicating the output format. Can be \\code{\"markdown\"}\n\\code{\"html\"}, or \\code{\"tt\"}. \\code{format = \"html\"} create an HTML table using the \\emph{gt}\npackage. \\code{format = \"tt\"} creates a \\code{tinytable} object, which is either\nprinted as markdown or HTML table, depending on the environment. See\n\\code{\\link[insight:export_table]{insight::export_table()}} for details.}\n}\n\\value{\nA data frame, or a list of data frames, with one frequency table\nas data frame per variable.\n}\n\\description{\nThis function creates frequency or crosstables of variables,\nincluding the number of levels/values as well as the distribution of raw,\nvalid and cumulative percentages. For crosstables, row, column and cell\npercentages can be calculated.\n}\n\\details{\nThere is an \\code{as.data.frame()} method, to return the frequency tables as a\ndata frame. The structure of the returned object is a nested data frame,\nwhere the first column contains name of the variable for which frequencies\nwere calculated, and the second column is a list column that contains the\nfrequency tables as data frame. See \\link{as.table.datawizard_table}.\n\nThere is also an \\code{as.table()} method, which returns a table object with the\nfrequencies of the variable. This is useful for further statistical analysis,\ne.g. for using \\code{chisq.test()} on the frequency table. See\n\\link{as.table.datawizard_table}.\n}\n\\note{\nThere are \\code{print_html()} and \\code{print_md()} methods available for printing\nfrequency or crosstables in HTML and markdown format, e.g.\n\\code{print_html(data_tabulate(x))}. The \\code{print()} method for text outputs passes\narguments in \\code{...} to \\code{\\link[insight:export_table]{insight::export_table()}}.\n}\n\\section{Crosstables}{\n\nIf \\code{by} is supplied, a crosstable is created. The crosstable includes \\verb{<NA>}\n(missing) values by default. The first column indicates values of \\code{x}, the\nfirst row indicates values of \\code{by} (including missing values). The last row\nand column contain the total frequencies for each row and column, respectively.\nSetting \\code{remove_na = FALSE} will omit missing values from the crosstable.\nSetting \\code{proportions} to \\code{\"row\"} or \\code{\"column\"} will add row or column\npercentages. Setting \\code{proportions} to \\code{\"full\"} will add relative frequencies\nfor the full table.\n}\n\n\\examples{\n\\dontshow{if (requireNamespace(\"poorman\")) withAutoprint(\\{ # examplesIf}\n# frequency tables -------\n# ------------------------\ndata(efc)\n\n# vector/factor\ndata_tabulate(efc$c172code)\n\n# drop missing values\ndata_tabulate(efc$c172code, remove_na = TRUE)\n\n# data frame\ndata_tabulate(efc, c(\"e42dep\", \"c172code\"))\n\n# grouped data frame\nsuppressPackageStartupMessages(library(poorman, quietly = TRUE))\nefc \\%>\\%\n  group_by(c172code) \\%>\\%\n  data_tabulate(\"e16sex\")\n\n# collapse tables\nefc \\%>\\%\n  group_by(c172code) \\%>\\%\n  data_tabulate(\"e16sex\", collapse = TRUE)\n\n# for larger N's (> 100000), a big mark is automatically added\nset.seed(123)\nx <- sample(1:3, 1e6, TRUE)\ndata_tabulate(x, name = \"Large Number\")\n\n# to remove the big mark, use \"print(..., big_mark = \"\")\"\nprint(data_tabulate(x), big_mark = \"\")\n\n# weighted frequencies\nset.seed(123)\nefc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\ndata_tabulate(efc$e42dep, weights = efc$weights)\n\n# crosstables ------\n# ------------------\n\n# add some missing values\nset.seed(123)\nefc$e16sex[sample.int(nrow(efc), 5)] <- NA\n\ndata_tabulate(efc, \"c172code\", by = \"e16sex\")\n\n# add row and column percentages\ndata_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"row\")\ndata_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\")\n\n# omit missing values\ndata_tabulate(\n  efc$c172code,\n  by = efc$e16sex,\n  proportions = \"column\",\n  remove_na = TRUE\n)\n\n# round percentages\nout <- data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\")\nprint(out, digits = 0)\n\\dontshow{\\}) # examplesIf}\n}\n\\seealso{\n\\link{as.prop.table}\n}\n"
  },
  {
    "path": "man/data_to_long.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_to_long.R\n\\name{data_to_long}\n\\alias{data_to_long}\n\\alias{reshape_longer}\n\\title{Reshape (pivot) data from wide to long}\n\\usage{\ndata_to_long(\n  data,\n  select = \"all\",\n  names_to = \"name\",\n  names_prefix = NULL,\n  names_sep = NULL,\n  names_pattern = NULL,\n  values_to = \"value\",\n  values_drop_na = FALSE,\n  rows_to = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  ...,\n  cols\n)\n\nreshape_longer(\n  data,\n  select = \"all\",\n  names_to = \"name\",\n  names_prefix = NULL,\n  names_sep = NULL,\n  names_pattern = NULL,\n  values_to = \"value\",\n  values_drop_na = FALSE,\n  rows_to = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  ...,\n  cols\n)\n}\n\\arguments{\n\\item{data}{A data frame to convert to long format, so that it has more\nrows and fewer columns after the operation.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{names_to}{The name of the new column (variable) that will contain the\n\\emph{names} from columns in \\code{select} as values, to identify the source of the\nvalues. \\code{names_to} can be a character vector with more than one column name,\nin which case \\code{names_sep} or \\code{names_pattern} must be provided in order to\nidentify which parts of the column names go into newly created columns.\nSee also 'Examples'.}\n\n\\item{names_prefix}{A regular expression used to remove matching text from\nthe start of each variable name.}\n\n\\item{names_sep, names_pattern}{If \\code{names_to} contains multiple values, this\nargument controls how the column name is broken up. \\code{names_pattern} takes a\nregular expression containing matching groups, i.e. \"()\".}\n\n\\item{values_to}{The name of the new column that will contain the \\emph{values} of\nthe columns in \\code{select}.}\n\n\\item{values_drop_na}{If \\code{TRUE}, will drop rows that contain only \\code{NA} in the\n\\code{values_to} column. This effectively converts explicit missing values to\nimplicit missing values, and should generally be used only when missing values\nin data were created by its structure.}\n\n\\item{rows_to}{The name of the column that will contain the row names or row\nnumbers from the original data. If \\code{NULL}, will be removed.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{...}{Currently not used.}\n\n\\item{cols}{Identical to \\code{select}. This argument is here to ensure compatibility\nwith \\code{tidyr::pivot_longer()}. If both \\code{select} and \\code{cols} are provided, \\code{cols}\nis used.}\n}\n\\value{\nIf a tibble was provided as input, \\code{reshape_longer()} also returns a\ntibble. Otherwise, it returns a data frame.\n}\n\\description{\nThis function \"lengthens\" data, increasing the number of rows and decreasing\nthe number of columns. This is a dependency-free base-R equivalent of\n\\code{tidyr::pivot_longer()}.\n}\n\\details{\nReshaping data into long format usually means that the input data frame is\nin \\emph{wide} format, where multiple measurements taken on the same subject are\nstored in multiple columns (variables). The long format stores the same\ninformation in a single column, with each measurement per subject stored in\na separate row. The values of all variables that are not in \\code{select} will\nbe repeated.\n\nThe necessary information for \\code{data_to_long()} is:\n\\itemize{\n\\item The columns that contain the repeated measurements (\\code{select}).\n\\item The name of the newly created column that will contain the names of the\ncolumns in \\code{select} (\\code{names_to}), to identify the source of the values.\n\\code{names_to} can also be a character vector with more than one column name,\nin which case \\code{names_sep} or \\code{names_pattern} must be provided to specify\nwhich parts of the column names go into the newly created columns.\n\\item The name of the newly created column that contains the values of the\ncolumns in \\code{select} (\\code{values_to}).\n}\n\nIn other words: repeated measurements that are spread across several columns\nwill be gathered into a single column (\\code{values_to}), with the original column\nnames, that identify the source of the gathered values, stored in one or more\nnew columns (\\code{names_to}).\n}\n\\examples{\n\\dontshow{if (all(insight::check_if_installed(c(\"psych\", \"tidyr\"), quietly = TRUE))) withAutoprint(\\{ # examplesIf}\nwide_data <- setNames(\n  data.frame(replicate(2, rnorm(8))),\n  c(\"Time1\", \"Time2\")\n)\nwide_data$ID <- 1:8\nwide_data\n\n# Default behaviour (equivalent to tidyr::pivot_longer(wide_data, cols = 1:3))\n# probably doesn't make much sense to mix \"time\" and \"id\"\ndata_to_long(wide_data)\n\n# Customizing the names\ndata_to_long(\n  wide_data,\n  select = c(\"Time1\", \"Time2\"),\n  names_to = \"Timepoint\",\n  values_to = \"Score\"\n)\n\n# Reshape multiple columns into long format.\nmydat <- data.frame(\n  age = c(20, 30, 40),\n  sex = c(\"Female\", \"Male\", \"Male\"),\n  score_t1 = c(30, 35, 32),\n  score_t2 = c(33, 34, 37),\n  score_t3 = c(36, 35, 38),\n  speed_t1 = c(2, 3, 1),\n  speed_t2 = c(3, 4, 5),\n  speed_t3 = c(1, 8, 6)\n)\n# The column names are split into two columns: \"type\" and \"time\". The\n# pattern for splitting column names is provided in `names_pattern`. Values\n# of all \"score_*\" and \"speed_*\" columns are gathered into a single column\n# named \"count\".\ndata_to_long(\n  mydat,\n  select = 3:8,\n  names_to = c(\"type\", \"time\"),\n  names_pattern = \"(score|speed)_t(\\\\\\\\d+)\",\n  values_to = \"count\"\n)\n\n# Full example\n# ------------------\ndata <- psych::bfi # Wide format with one row per participant's personality test\n\n# Pivot long format\nvery_long_data <- data_to_long(data,\n  select = regex(\"\\\\\\\\d\"), # Select all columns that contain a digit\n  names_to = \"Item\",\n  values_to = \"Score\",\n  rows_to = \"Participant\"\n)\nhead(very_long_data)\n\neven_longer_data <- data_to_long(\n  tidyr::who,\n  select = new_sp_m014:newrel_f65,\n  names_to = c(\"diagnosis\", \"gender\", \"age\"),\n  names_pattern = \"new_?(.*)_(.)(.*)\",\n  values_to = \"count\"\n)\nhead(even_longer_data)\n\\dontshow{\\}) # examplesIf}\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/data_to_wide.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_to_wide.R\n\\name{data_to_wide}\n\\alias{data_to_wide}\n\\alias{reshape_wider}\n\\title{Reshape (pivot) data from long to wide}\n\\usage{\ndata_to_wide(\n  data,\n  id_cols = NULL,\n  values_from = \"Value\",\n  names_from = \"Name\",\n  names_sep = \"_\",\n  names_prefix = \"\",\n  names_glue = NULL,\n  values_fill = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\nreshape_wider(\n  data,\n  id_cols = NULL,\n  values_from = \"Value\",\n  names_from = \"Name\",\n  names_sep = \"_\",\n  names_prefix = \"\",\n  names_glue = NULL,\n  values_fill = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame to convert to wide format, so that it has more\ncolumns and fewer rows post-widening than pre-widening.}\n\n\\item{id_cols}{The name of the column that identifies the rows in the data\nby which observations are grouped and the gathered data is spread into new\ncolumns. Usually, this is a variable containing an ID for observations that\nhave been repeatedly measured. If \\code{NULL}, it will use all remaining columns\nthat are not in \\code{names_from} or \\code{values_from} as ID columns. \\code{id_cols} can\nalso be a character vector with more than one name of identifier columns. See\nalso 'Details' and 'Examples'.}\n\n\\item{values_from}{The name of the columns in the original data that contains\nthe values used to fill the new columns created in the widened data. Can also\nbe one of the selection helpers (see argument \\code{select} in \\code{\\link[=data_select]{data_select()}}).}\n\n\\item{names_from}{The name of the column in the original data whose values\nwill be used for naming the new columns created in the widened data. Each\nunique value in this column will become the name of one of these new columns.\nIn case \\code{names_prefix} is provided, column names will be concatenated with\nthe string given in \\code{names_prefix}. If \\code{values_from} specifies more than one\nvariable that should be widened, the new column names are a combination of\nthe old column names in \\code{values_from} and the \\emph{values} from \\code{names_from}, to\navoid duplicate column names.}\n\n\\item{names_sep}{If \\code{names_from} or \\code{values_from} contains multiple variables,\nthis will be used to join their values together into a single string to use\nas a column name.}\n\n\\item{names_prefix}{String added to the start of every variable name. This is\nparticularly useful if \\code{names_from} is a numeric vector and you want to create\nsyntactic variable names.}\n\n\\item{names_glue}{Instead of \\code{names_sep} and \\code{names_prefix}, you can supply a\n\\href{https://glue.tidyverse.org/index.html}{glue specification} that uses the\n\\code{names_from} columns to create custom column names. Note that the only\ndelimiters supported by \\code{names_glue} are curly brackets, \\verb{\\{} and \\verb{\\}}.}\n\n\\item{values_fill}{Defunct argument, which has no function anymore. Will be\nremoved in future versions.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{...}{Not used for now.}\n}\n\\value{\nIf a tibble was provided as input, \\code{data_to_wide()} also returns a\ntibble. Otherwise, it returns a data frame.\n}\n\\description{\nThis function \"widens\" data, increasing the number of columns and decreasing\nthe number of rows. This is a dependency-free base-R equivalent of\n\\code{tidyr::pivot_wider()}.\n}\n\\details{\nReshaping data into wide format usually means that the input data frame is\nin \\emph{long} format, where multiple measurements taken on the same subject are\nstored in multiple rows. The wide format stores the same information in a\nsingle row, with each measurement stored in a separate column. Thus, the\nnecessary information for \\code{data_to_wide()} is:\n\\itemize{\n\\item The name of the column(s) that identify the groups or repeated measurements\n(\\code{id_cols}).\n\\item The name of the column whose \\emph{values} will become the new column names\n(\\code{names_from}). Since these values may not necessarily reflect appropriate\ncolumn names, you can use \\code{names_prefix} to add a prefix to each newly\ncreated column name.\n\\item The name of the column(s) that contain the values (\\code{values_from}) for the\nnew columns that are created by \\code{names_from}.\n}\n\nIn other words: repeated measurements, as indicated by \\code{id_cols}, that are\nsaved into the column \\code{values_from} will be spread into new columns, which\nwill be named after the values in \\code{names_from}. See also 'Examples'.\n}\n\\examples{\n\\dontshow{if (requireNamespace(\"lme4\", quietly = TRUE)) withAutoprint(\\{ # examplesIf}\ndata_long <- read.table(header = TRUE, text = \"\n subject sex condition measurement\n       1   M   control         7.9\n       1   M     cond1        12.3\n       1   M     cond2        10.7\n       2   F   control         6.3\n       2   F     cond1        10.6\n       2   F     cond2        11.1\n       3   F   control         9.5\n       3   F     cond1        13.1\n       3   F     cond2        13.8\n       4   M   control        11.5\n       4   M     cond1        13.4\n       4   M     cond2        12.9\")\n\n# converting long data into wide format\ndata_to_wide(\n  data_long,\n  id_cols = \"subject\",\n  names_from = \"condition\",\n  values_from = \"measurement\"\n)\n\n# converting long data into wide format with custom column names\ndata_to_wide(\n  data_long,\n  id_cols = \"subject\",\n  names_from = \"condition\",\n  values_from = \"measurement\",\n  names_prefix = \"Var.\",\n  names_sep = \".\"\n)\n\n# converting long data into wide format, combining multiple columns\nproduction <- expand.grid(\n  product = c(\"A\", \"B\"),\n  country = c(\"AI\", \"EI\"),\n  year = 2000:2014\n)\nproduction <- data_filter(production, (product == \"A\" & country == \"AI\") | product == \"B\")\nproduction$production <- rnorm(nrow(production))\n\ndata_to_wide(\n  production,\n  names_from = c(\"product\", \"country\"),\n  values_from = \"production\",\n  names_glue = \"prod_{product}_{country}\"\n)\n\n# reshaping multiple long columns into wide format. to avoid duplicate\n# column names, new names are a combination of the old column names in\n# `values_from` and the values from `names_from`\ndata_long <- read.table(header = TRUE, text = \"\nsubject_id time score anxiety test\n         1    1    10       5   NA\n         1    2    NA       7   NA\n         2    1    15       6   NA\n         2    2    12      NA   NA\n         3    1    18       8   NA\n         5    2    11       4   NA\n         4    1    NA       5   NA\n         4    2    14      NA   NA\")\n\ndata_to_wide(\n  data_long,\n  id_cols = \"subject_id\",\n  names_from = \"time\",\n  values_from = c(\"score\", \"anxiety\", \"test\")\n)\n\n# using the \"sleepstudy\" dataset\ndata(sleepstudy, package = \"lme4\")\n\n# the sleepstudy data contains repeated measurements of average reaction\n# times for each subjects over multiple days, in a sleep deprivation study.\n# It is in long-format, i.e. each row corresponds to a single measurement.\n# The variable \"Days\" contains the timepoint of the measurement, and\n# \"Reaction\" contains the measurement itself. Converting this data to wide\n# format will create a new column for each day, with the reaction time as the\n# value.\nhead(sleepstudy)\n\ndata_to_wide(\n  sleepstudy,\n  id_cols = \"Subject\",\n  names_from = \"Days\",\n  values_from = \"Reaction\"\n)\n\n# clearer column names\ndata_to_wide(\n  sleepstudy,\n  id_cols = \"Subject\",\n  names_from = \"Days\",\n  values_from = \"Reaction\",\n  names_prefix = \"Reaction_Day_\"\n)\n\n# For unequal group sizes, missing information is filled with NA\nd <- subset(sleepstudy, Days \\%in\\% c(0, 1, 2, 3, 4))[c(1:9, 11:13, 16:17, 21), ]\n\n# long format, different number of \"Subjects\"\nd\n\ndata_to_wide(\n  d,\n  id_cols = \"Subject\",\n  names_from = \"Days\",\n  values_from = \"Reaction\",\n  names_prefix = \"Reaction_Day_\"\n)\n\\dontshow{\\}) # examplesIf}\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/data_unique.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_unique.R\n\\name{data_unique}\n\\alias{data_unique}\n\\title{Keep only one row from all with duplicated IDs}\n\\usage{\ndata_unique(\n  data,\n  select = NULL,\n  keep = \"best\",\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{keep}{The method to be used for duplicate selection, either \"best\"\n(the default), \"first\", or \"last\".}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA data frame, containing only the chosen duplicates.\n}\n\\description{\nFrom all rows with at least one duplicated ID,\nkeep only one. Methods for selecting the duplicated row are\neither the first duplicate, the last duplicate, or the \"best\"\nduplicate (default), based on the duplicate with the smallest\nnumber of \\code{NA}. In case of ties, it picks the first\nduplicate, as it is the one most likely to be valid and\nauthentic, given practice effects.\n\nContrarily to \\code{dplyr::distinct()}, \\code{data_unique()} keeps all columns.\n}\n\\examples{\ndf1 <- data.frame(\n  id = c(1, 2, 3, 1, 3),\n  item1 = c(NA, 1, 1, 2, 3),\n  item2 = c(NA, 1, 1, 2, 3),\n  item3 = c(NA, 1, 1, 2, 3)\n)\n\ndata_unique(df1, select = \"id\")\n}\n\\seealso{\n\\code{\\link[=data_duplicated]{data_duplicated()}}\n}\n"
  },
  {
    "path": "man/data_unite.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_unite.R\n\\name{data_unite}\n\\alias{data_unite}\n\\title{Unite (\"merge\") multiple variables}\n\\usage{\ndata_unite(\n  data,\n  new_column = NULL,\n  select = NULL,\n  exclude = NULL,\n  separator = \"_\",\n  append = FALSE,\n  remove_na = FALSE,\n  ignore_case = FALSE,\n  verbose = TRUE,\n  regex = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{new_column}{The name of the new column, as a string.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{separator}{A character to use between values.}\n\n\\item{append}{Logical, if \\code{FALSE} (default), removes original columns that\nwere united. If \\code{TRUE}, all columns are preserved and the new column is\nappended to the data frame.}\n\n\\item{remove_na}{Logical, if \\code{TRUE}, missing values (\\code{NA}) are not included\nin the united values. If \\code{FALSE}, missing values are represented as \\code{\"NA\"}\nin the united values.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{...}{Currently not used.}\n}\n\\value{\n\\code{data}, with a newly created variable.\n}\n\\description{\nMerge values of multiple variables per observation into one new variable.\n}\n\\examples{\nd <- data.frame(\n  x = 1:3,\n  y = letters[1:3],\n  z = 6:8\n)\nd\ndata_unite(d, new_column = \"xyz\")\ndata_unite(d, new_column = \"xyz\", remove = FALSE)\ndata_unite(d, new_column = \"xyz\", select = c(\"x\", \"z\"))\ndata_unite(d, new_column = \"xyz\", select = c(\"x\", \"z\"), append = TRUE)\n}\n\\seealso{\n\\code{\\link[=data_separate]{data_separate()}}\n}\n"
  },
  {
    "path": "man/datawizard-package.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/datawizard-package.R\n\\docType{package}\n\\name{datawizard-package}\n\\alias{datawizard-package}\n\\alias{datawizard}\n\\title{datawizard: Easy Data Wrangling and Statistical Transformations}\n\\description{\nA lightweight package to assist in key steps involved in any data analysis\nworkflow:\n\\itemize{\n\\item wrangling the raw data to get it in the needed form,\n\\item applying preprocessing steps and statistical transformations, and\n\\item compute statistical summaries of data properties and distributions.\n}\n\nIt is also the data wrangling backend for packages in 'easystats' ecosystem.\nReference: Patil et al. (2022) \\doi{10.21105/joss.04684}.\n}\n\\details{\n\\code{datawizard}\n}\n\\seealso{\nUseful links:\n\\itemize{\n  \\item \\url{https://easystats.github.io/datawizard/}\n  \\item Report bugs at \\url{https://github.com/easystats/datawizard/issues}\n}\n\n}\n\\author{\n\\strong{Maintainer}: Etienne Bacher \\email{etienne.bacher@protonmail.com} (\\href{https://orcid.org/0000-0002-9271-5075}{ORCID})\n\nAuthors:\n\\itemize{\n  \\item Indrajeet Patil \\email{patilindrajeet.science@gmail.com} (\\href{https://orcid.org/0000-0003-1995-6531}{ORCID})\n  \\item Dominique Makowski \\email{dom.makowski@gmail.com} (\\href{https://orcid.org/0000-0001-5375-9967}{ORCID})\n  \\item Daniel Lüdecke \\email{d.luedecke@uke.de} (\\href{https://orcid.org/0000-0002-8895-3206}{ORCID})\n  \\item Mattan S. Ben-Shachar \\email{matanshm@post.bgu.ac.il} (\\href{https://orcid.org/0000-0002-4287-4801}{ORCID})\n  \\item Brenton M. Wiernik \\email{brenton@wiernik.org} (\\href{https://orcid.org/0000-0001-9560-6336}{ORCID})\n}\n\nOther contributors:\n\\itemize{\n  \\item Rémi Thériault \\email{remi.theriault@mail.mcgill.ca} (\\href{https://orcid.org/0000-0003-4315-6788}{ORCID}) [contributor]\n  \\item Thomas J. Faulkenberry \\email{faulkenberry@tarleton.edu} [reviewer]\n  \\item Robert Garrett \\email{rcg4@illinois.edu} [reviewer]\n}\n\n}\n\\keyword{internal}\n"
  },
  {
    "path": "man/demean.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/demean.R\n\\name{demean}\n\\alias{demean}\n\\alias{degroup}\n\\alias{detrend}\n\\title{Compute group-meaned and de-meaned variables}\n\\usage{\ndemean(\n  x,\n  select,\n  by,\n  nested = FALSE,\n  suffix_demean = \"_within\",\n  suffix_groupmean = \"_between\",\n  append = TRUE,\n  add_attributes = TRUE,\n  verbose = TRUE\n)\n\ndegroup(\n  x,\n  select,\n  by,\n  nested = FALSE,\n  center = \"mean\",\n  suffix_demean = \"_within\",\n  suffix_groupmean = \"_between\",\n  append = TRUE,\n  add_attributes = TRUE,\n  verbose = TRUE\n)\n\ndetrend(\n  x,\n  select,\n  by,\n  nested = FALSE,\n  center = \"mean\",\n  suffix_demean = \"_within\",\n  suffix_groupmean = \"_between\",\n  append = TRUE,\n  add_attributes = TRUE,\n  verbose = TRUE\n)\n}\n\\arguments{\n\\item{x}{A data frame.}\n\n\\item{select}{Character vector (or formula) with names of variables to select\nthat should be group- and de-meaned.}\n\n\\item{by}{Character vector (or formula) with the name of the variable that\nindicates the group- or cluster-ID. For cross-classified or nested designs,\n\\code{by} can also identify two or more variables as group- or cluster-IDs. If\nthe data is nested and should be treated as such, set \\code{nested = TRUE}. Else,\nif \\code{by} defines two or more variables and \\code{nested = FALSE}, a cross-classified\ndesign is assumed. Note that \\code{demean()} and \\code{degroup()} can't handle a mix\nof nested and cross-classified designs in one model.\n\nFor nested designs, \\code{by} can be:\n\\itemize{\n\\item a character vector with the name of the variable that indicates the\nlevels, ordered from \\emph{highest} level to \\emph{lowest} (e.g.\n\\code{by = c(\"L4\", \"L3\", \"L2\")}.\n\\item a character vector with variable names in the format \\code{by = \"L4/L3/L2\"},\nwhere the levels are separated by \\code{/}.\n}\n\nSee also section \\emph{De-meaning for cross-classified designs} and\n\\emph{De-meaning for nested designs} below.}\n\n\\item{nested}{Logical, if \\code{TRUE}, the data is treated as nested. If \\code{FALSE},\nthe data is treated as cross-classified. Only applies if \\code{by} contains more\nthan one variable.}\n\n\\item{suffix_demean, suffix_groupmean}{String value, will be appended to the\nnames of the group-meaned and de-meaned variables of \\code{x}. By default,\nde-meaned variables will be suffixed with \\code{\"_within\"} and\ngrouped-meaned variables with \\code{\"_between\"}.}\n\n\\item{append}{Logical, if \\code{TRUE} (default), the group- and de-meaned\nvariables will be appended (column bind) to the original data \\code{x},\nthus returning both the original and the de-/group-meaned variables.}\n\n\\item{add_attributes}{Logical, if \\code{TRUE}, the returned variables gain\nattributes to indicate the within- and between-effects. This is only\nrelevant when printing \\code{model_parameters()} - in such cases, the\nwithin- and between-effects are printed in separated blocks.}\n\n\\item{verbose}{Toggle warnings and messages.}\n\n\\item{center}{Method for centering. \\code{demean()} always performs\nmean-centering, while \\code{degroup()} can use \\code{center = \"median\"} or\n\\code{center = \"mode\"} for median- or mode-centering, and also \\code{\"min\"}\nor \\code{\"max\"}.}\n}\n\\value{\nA data frame with the group-/de-meaned variables, which get the suffix\n\\code{\"_between\"} (for the group-meaned variable) and \\code{\"_within\"} (for the\nde-meaned variable) by default. For cross-classified or nested designs,\nthe name pattern of the group-meaned variables is the name of the centered\nvariable followed by the name of the variable that indicates the related\ngrouping level, e.g. \\code{predictor_L3_between} and \\code{predictor_L2_between}.\n}\n\\description{\n\\code{demean()} computes group- and de-meaned versions of a variable that can be\nused in regression analysis to model the between- and within-subject effect\n(person-mean centering or centering within clusters). \\code{degroup()} is more\ngeneric in terms of the centering-operation. While \\code{demean()} always uses\nmean-centering, \\code{degroup()} can also use the mode or median for centering.\n}\n\\section{Heterogeneity Bias}{\n\n\nMixed models include different levels of sources of variability, i.e.\nerror terms at each level. When macro-indicators (or level-2 predictors,\nor higher-level units, or more general: \\emph{group-level predictors that\n\\strong{vary} within and across groups}) are included as fixed effects (i.e.\ntreated as covariate at level-1), the variance that is left unaccounted for\nthis covariate will be absorbed into the error terms of level-1 and level-2\n(\\emph{Bafumi and Gelman 2006; Gelman and Hill 2007, Chapter 12.6.}):\n\"Such covariates contain two parts: one that is specific to the higher-level\nentity that does not vary between occasions, and one that represents the\ndifference between occasions, within higher-level entities\" (\\emph{Bell et al. 2015}).\nHence, the error terms will be correlated with the covariate, which violates\none of the assumptions of mixed models (iid, independent and identically\ndistributed error terms). This bias is also called the \\emph{heterogeneity bias}\n(\\emph{Bell et al. 2015}). To resolve this problem, level-2 predictors used as\n(level-1) covariates should be separated into their \"within\" and \"between\"\neffects by \"de-meaning\" and \"group-meaning\": After demeaning time-varying\npredictors, \"at the higher level, the mean term is no longer constrained by\nLevel 1 effects, so it is free to account for all the higher-level variance\nassociated with that variable\" (\\emph{Bell et al. 2015}).\n}\n\n\\section{Panel data and correlating fixed and group effects}{\n\n\n\\code{demean()} is intended to create group- and de-meaned variables for panel\nregression models (fixed effects models), or for complex\nrandom-effect-within-between models (see \\emph{Bell et al. 2015, 2018}), where\ngroup-effects (random effects) and fixed effects correlate (see\n\\emph{Bafumi and Gelman 2006}). This can happen, for instance, when analyzing\npanel data, which can lead to \\emph{Heterogeneity Bias}. To control for correlating\npredictors and group effects, it is recommended to include the group-meaned\nand de-meaned version of \\emph{time-varying covariates} (and group-meaned version\nof \\emph{time-invariant covariates} that are on a higher level, e.g. level-2\npredictors) in the model. By this, one can fit complex multilevel models for\npanel data, including time-varying predictors, time-invariant predictors and\nrandom effects.\n}\n\n\\section{Why mixed models are preferred over fixed effects models}{\n\n\nA mixed models approach can model the causes of endogeneity explicitly\nby including the (separated) within- and between-effects of time-varying\nfixed effects and including time-constant fixed effects. Furthermore,\nmixed models also include random effects, thus a mixed models approach\nis superior to classic fixed-effects models, which lack information of\nvariation in the group-effects or between-subject effects. Furthermore,\nfixed effects regression cannot include random slopes, which means that\nfixed effects regressions are neglecting \"cross-cluster differences in the\neffects of lower-level controls (which) reduces the precision of estimated\ncontext effects, resulting in unnecessarily wide confidence intervals and\nlow statistical power\" (\\emph{Heisig et al. 2017}).\n}\n\n\\section{Terminology}{\n\n\nThe group-meaned variable is simply the mean of an independent variable\nwithin each group (or id-level or cluster) represented by \\code{by}. It represents\nthe cluster-mean of an independent variable. The regression coefficient of a\ngroup-meaned variable is the \\emph{between-subject-effect}. The de-meaned variable\nis then the centered version of the group-meaned variable. De-meaning is\nsometimes also called person-mean centering or centering within clusters.\nThe regression coefficient of a de-meaned variable represents the\n\\emph{within-subject-effect}.\n}\n\n\\section{De-meaning with continuous predictors}{\n\n\nFor continuous time-varying predictors, the recommendation is to include\nboth their de-meaned and group-meaned versions as fixed effects, but not\nthe raw (untransformed) time-varying predictors themselves. The de-meaned\npredictor should also be included as random effect (random slope). In\nregression models, the coefficient of the de-meaned predictors indicates\nthe within-subject effect, while the coefficient of the group-meaned\npredictor indicates the between-subject effect.\n}\n\n\\section{De-meaning with binary predictors}{\n\n\nFor binary time-varying predictors, there are two recommendations. First\nis to include the raw (untransformed) binary predictor as fixed effect\nonly and the \\emph{de-meaned} variable as random effect (random slope).\nThe alternative would be to add the de-meaned version(s) of binary\ntime-varying covariates as additional fixed effect as well (instead of\nadding it as random slope). Centering time-varying binary variables to\nobtain within-effects (level 1) isn't necessary. They have a sensible\ninterpretation when left in the typical 0/1 format (\\emph{Hoffmann 2015,\nchapter 8-2.I}). \\code{demean()} will thus coerce categorical time-varying\npredictors to numeric to compute the de- and group-meaned versions for\nthese variables, where the raw (untransformed) binary predictor and the\nde-meaned version should be added to the model.\n}\n\n\\section{De-meaning of factors with more than 2 levels}{\n\n\nFactors with more than two levels are demeaned in two ways: first, these\nare also converted to numeric and de-meaned; second, dummy variables\nare created (binary, with 0/1 coding for each level) and these binary\ndummy-variables are de-meaned in the same way (as described above).\nPackages like \\strong{panelr} internally convert factors to dummies before\ndemeaning, so this behaviour can be mimicked here.\n}\n\n\\section{De-meaning interaction terms}{\n\n\nThere are multiple ways to deal with interaction terms of within- and\nbetween-effects.\n\\itemize{\n\\item A classical approach is to simply use the product term of the de-meaned\nvariables (i.e. introducing the de-meaned variables as interaction term\nin the model formula, e.g. \\code{y ~ x_within * time_within}). This approach,\nhowever, might be subject to bias (see \\emph{Giesselmann & Schmidt-Catran 2020}).\n\\item Another option is to first calculate the product term and then apply the\nde-meaning to it. This approach produces an estimator \"that reflects\nunit-level differences of interacted variables whose moderators vary\nwithin units\", which is desirable if \\emph{no} within interaction of\ntwo time-dependent variables is required. This is what \\code{demean()} does\ninternally when \\code{select} contains interaction terms.\n\\item A third option, when the interaction should result in a genuine within\nestimator, is to \"double de-mean\" the interaction terms\n(\\emph{Giesselmann & Schmidt-Catran 2018}), however, this is currently\nnot supported by \\code{demean()}. If this is required, the \\code{wmb()}\nfunction from the \\strong{panelr} package should be used.\n}\n\nTo de-mean interaction terms for within-between models, simply specify\nthe term as interaction for the \\code{select}-argument, e.g. \\code{select = \"a*b\"}\n(see 'Examples').\n}\n\n\\section{De-meaning for cross-classified designs}{\n\n\n\\code{demean()} can handle cross-classified designs, where the data has two or\nmore groups at the higher (i.e. second) level. In such cases, the\n\\code{by}-argument can identify two or more variables that represent the\ncross-classified group- or cluster-IDs. The de-meaned variables for\ncross-classified designs are simply subtracting all group means from each\nindividual value, i.e. \\emph{fully cluster-mean-centering} (see \\emph{Guo et al. 2024}\nfor details). Note that de-meaning for cross-classified designs is \\emph{not}\nequivalent to de-meaning of nested data structures from models with three or\nmore levels. Set \\code{nested = TRUE} to explicitly assume a nested design. For\ncross-classified designs, de-meaning is supposed to work for models like\n\\code{y ~ x + (1|level3) + (1|level2)}, but \\emph{not} for models like\n\\code{y ~ x + (1|level3/level2)}. Note that \\code{demean()} and \\code{degroup()} can't\nhandle a mix of nested and cross-classified designs in one model.\n}\n\n\\section{De-meaning for nested designs}{\n\n\n\\emph{Brincks et al. (2017)} have suggested an algorithm to center variables for\nnested designs, which is implemented in \\code{demean()}. For nested designs, set\n\\code{nested = TRUE} \\emph{and} specify the variables that indicate the different\nlevels in descending order in the \\code{by} argument. E.g.,\n\\verb{by = c(\"level4\", \"level3, \"level2\")} assumes a model like\n\\code{y ~ x + (1|level4/level3/level2)}. An alternative notation for the\n\\code{by}-argument would be \\code{by = \"level4/level3/level2\"}, similar to the\nformula notation.\n}\n\n\\section{Analysing panel data with mixed models using lme4}{\n\n\nA description of how to translate the formulas described in \\emph{Bell et al. 2018}\ninto R using \\code{lmer()} from \\strong{lme4} can be found in\n\\href{https://easystats.github.io/parameters/articles/demean.html}{this vignette}.\n}\n\n\\examples{\n\ndata(iris)\niris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID\niris$binary <- as.factor(rbinom(150, 1, 0.35)) # binary variable\n\nx <- demean(iris, select = c(\"Sepal.Length\", \"Petal.Length\"), by = \"ID\")\nhead(x)\n\nx <- demean(iris, select = c(\"Sepal.Length\", \"binary\", \"Species\"), by = \"ID\")\nhead(x)\n\n\n# demean interaction term x*y\ndat <- data.frame(\n  a = c(1, 2, 3, 4, 1, 2, 3, 4),\n  x = c(4, 3, 3, 4, 1, 2, 1, 2),\n  y = c(1, 2, 1, 2, 4, 3, 2, 1),\n  ID = c(1, 2, 3, 1, 2, 3, 1, 2)\n)\ndemean(dat, select = c(\"a\", \"x*y\"), by = \"ID\")\n\n# or in formula-notation\ndemean(dat, select = ~ a + x * y, by = ~ID)\n\n}\n\\references{\n\\itemize{\n\\item Bafumi J, Gelman A. 2006. Fitting Multilevel Models When Predictors\nand Group Effects Correlate. In. Philadelphia, PA: Annual meeting of the\nAmerican Political Science Association.\n\\item Bell A, Fairbrother M, Jones K. 2019. Fixed and Random Effects\nModels: Making an Informed Choice. Quality & Quantity (53); 1051-1074\n\\item Bell A, Jones K. 2015. Explaining Fixed Effects: Random Effects\nModeling of Time-Series Cross-Sectional and Panel Data. Political Science\nResearch and Methods, 3(1), 133–153.\n\\item Brincks, A. M., Enders, C. K., Llabre, M. M., Bulotsky-Shearer, R. J.,\nPrado, G., and Feaster, D. J. (2017). Centering Predictor Variables in\nThree-Level Contextual Models. Multivariate Behavioral Research, 52(2),\n149–163. https://doi.org/10.1080/00273171.2016.1256753\n\\item Gelman A, Hill J. 2007. Data Analysis Using Regression and\nMultilevel/Hierarchical Models. Analytical Methods for Social Research.\nCambridge, New York: Cambridge University Press\n\\item Giesselmann M, Schmidt-Catran, AW. 2020. Interactions in fixed\neffects regression models. Sociological Methods & Research, 1–28.\nhttps://doi.org/10.1177/0049124120914934\n\\item Guo Y, Dhaliwal J, Rights JD. 2024. Disaggregating level-specific effects\nin cross-classified multilevel models. Behavior Research Methods, 56(4),\n3023–3057.\n\\item Heisig JP, Schaeffer M, Giesecke J. 2017. The Costs of Simplicity:\nWhy Multilevel Models May Benefit from Accounting for Cross-Cluster\nDifferences in the Effects of Controls. American Sociological Review 82\n(4): 796–827.\n\\item Hoffman L. 2015. Longitudinal analysis: modeling within-person\nfluctuation and change. New York: Routledge\n}\n}\n\\seealso{\nIf grand-mean centering (instead of centering within-clusters)\nis required, see \\code{\\link[=center]{center()}}. See \\code{\\link[performance:check_group_variation]{performance::check_group_variation()}}\nto check for heterogeneity bias.\n}\n"
  },
  {
    "path": "man/describe_distribution.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/describe_distribution.R\n\\name{describe_distribution}\n\\alias{describe_distribution}\n\\alias{describe_distribution.numeric}\n\\alias{describe_distribution.factor}\n\\alias{describe_distribution.data.frame}\n\\title{Describe a distribution}\n\\usage{\ndescribe_distribution(x, ...)\n\n\\method{describe_distribution}{numeric}(\n  x,\n  centrality = \"mean\",\n  dispersion = TRUE,\n  iqr = TRUE,\n  range = TRUE,\n  quartiles = FALSE,\n  ci = NULL,\n  iterations = 100,\n  threshold = 0.1,\n  verbose = TRUE,\n  ...\n)\n\n\\method{describe_distribution}{factor}(x, dispersion = TRUE, range = TRUE, verbose = TRUE, ...)\n\n\\method{describe_distribution}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  centrality = \"mean\",\n  dispersion = TRUE,\n  iqr = TRUE,\n  range = TRUE,\n  quartiles = FALSE,\n  include_factors = FALSE,\n  ci = NULL,\n  iterations = 100,\n  threshold = 0.1,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  by = NULL,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A numeric vector, a character vector, a data frame, or a list. See\n\\code{Details}.}\n\n\\item{...}{Additional arguments to be passed to or from methods.}\n\n\\item{centrality}{The point-estimates (centrality indices) to compute. Character\n(vector) or list with one or more of these options: \\code{\"median\"}, \\code{\"mean\"}, \\code{\"MAP\"}\n(see \\code{\\link[bayestestR:map_estimate]{map_estimate()}}), \\code{\"trimmed\"} (which is just \\code{mean(x, trim = threshold)}),\n\\code{\"mode\"} or \\code{\"all\"}.}\n\n\\item{dispersion}{Logical, if \\code{TRUE}, computes indices of dispersion related\nto the estimate(s) (\\code{SD} and \\code{MAD} for \\code{mean} and \\code{median}, respectively).\nDispersion is not available for \\code{\"MAP\"} or \\code{\"mode\"} centrality indices.}\n\n\\item{iqr}{Logical, if \\code{TRUE}, the interquartile range is calculated (based\non \\code{\\link[stats:IQR]{stats::IQR()}}, using \\code{type = 6}).}\n\n\\item{range}{Return the range (min and max).}\n\n\\item{quartiles}{Return the first and third quartiles (25th and 75th\npercentiles).}\n\n\\item{ci}{Confidence Interval (CI) level. Default is \\code{NULL}, i.e. no\nconfidence intervals are computed. If not \\code{NULL}, confidence intervals are\nbased on bootstrap replicates (see \\code{iterations}).}\n\n\\item{iterations}{The number of bootstrap replicates for computing confidence\nintervals. Only applies when \\code{ci} is not \\code{NULL}. Defaults to \\code{100}. For\nmore stable results, increase the number of \\code{iterations}, but note that this\ncan also increase the computation time significantly.}\n\n\\item{threshold}{For \\code{centrality = \"trimmed\"} (i.e. trimmed mean), indicates\nthe fraction (0 to 0.5) of observations to be trimmed from each end of the\nvector before the mean is computed.}\n\n\\item{verbose}{Show or silence warnings and messages.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{include_factors}{Logical, if \\code{TRUE}, factors are included in the\noutput, however, only columns for range (first and last factor levels) as\nwell as n and missing will contain information.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{by}{Column names indicating how to split the data in various groups\nbefore describing the distribution. \\code{by} groups will be added to potentially\nexisting groups created by \\code{data_group()}.}\n}\n\\value{\nA data frame with columns that describe the properties of the variables.\n}\n\\description{\nThis function describes a distribution by a set of indices (e.g., measures of\ncentrality, dispersion, range, skewness, (excess) kurtosis).\n}\n\\details{\nIf \\code{x} is a data frame, only numeric variables are kept and will be\ndisplayed in the summary by default.\n\nIf \\code{x} is a list, the behavior is different whether \\code{x} is a stored list. If\n\\code{x} is stored (for example, \\code{describe_distribution(mylist)} where \\code{mylist}\nwas created before), artificial variable names are used in the summary\n(\\code{Var_1}, \\code{Var_2}, etc.). If \\code{x} is an unstored list (for example,\n\\code{describe_distribution(list(mtcars$mpg))}), then \\code{\"mtcars$mpg\"} is used as\nvariable name.\n}\n\\note{\nThere is also a\n\\href{https://easystats.github.io/see/articles/parameters.html}{\\code{plot()}-method}\nimplemented in the \\href{https://easystats.github.io/see/}{\\strong{see}-package}.\n}\n\\examples{\n\\dontshow{if (require(\"bayestestR\", quietly = TRUE)) withAutoprint(\\{ # examplesIf}\ndescribe_distribution(rnorm(100))\n\ndata(iris)\ndescribe_distribution(iris)\ndescribe_distribution(iris, include_factors = TRUE, quartiles = TRUE)\ndescribe_distribution(list(mtcars$mpg, mtcars$cyl))\n\\dontshow{\\}) # examplesIf}\n}\n\\seealso{\n\\code{\\link[=kurtosis]{kurtosis()}} to compute kurtosis (recognized as excess kurtosis).\n}\n"
  },
  {
    "path": "man/distribution_mode.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/descriptives.R\n\\name{distribution_mode}\n\\alias{distribution_mode}\n\\title{Compute mode for a statistical distribution}\n\\usage{\ndistribution_mode(x)\n}\n\\arguments{\n\\item{x}{An atomic vector, a list, or a data frame.}\n}\n\\value{\nThe value that appears most frequently in the provided data.\nThe returned data structure will be the same as the entered one.\n}\n\\description{\nCompute mode for a statistical distribution\n}\n\\examples{\n\ndistribution_mode(c(1, 2, 3, 3, 4, 5))\ndistribution_mode(c(1.5, 2.3, 3.7, 3.7, 4.0, 5))\n\n}\n\\seealso{\nFor continuous variables, the\n\\strong{Highest Maximum a Posteriori probability estimate (MAP)} may be\na more useful way to estimate the most commonly-observed value\nthan the mode. See \\code{\\link[bayestestR:map_estimate]{bayestestR::map_estimate()}}.\n}\n"
  },
  {
    "path": "man/efc.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{efc}\n\\alias{efc}\n\\title{Sample dataset from the EFC Survey}\n\\description{\nSelected variables from the EUROFAMCARE survey. Useful when\ntesting on \"real-life\" data sets, including random missing values. This\ndata set also has value and variable label attributes.\n}\n\\keyword{data}\n"
  },
  {
    "path": "man/extract_column_names.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_select.R, R/extract_column_names.R\n\\name{data_select}\n\\alias{data_select}\n\\alias{extract_column_names}\n\\alias{find_columns}\n\\title{Find or get columns in a data frame based on search patterns}\n\\usage{\ndata_select(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\nextract_column_names(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\nfind_columns(\n  data,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{...}{Arguments passed down to other functions. Mostly not used yet.}\n}\n\\value{\n\\code{extract_column_names()} returns a character vector with column names that\nmatched the pattern in \\code{select} and \\code{exclude}, or \\code{NULL} if no matching\ncolumn name was found. \\code{data_select()} returns a data frame with matching\ncolumns.\n}\n\\description{\n\\code{extract_column_names()} returns column names from a data set that\nmatch a certain search pattern, while \\code{data_select()} returns the found data.\n}\n\\details{\nSpecifically for \\code{data_select()}, \\code{select} can also be a named character\nvector. In this case, the names are used to rename the columns in the\noutput data frame. See 'Examples'.\n\nNote that it is possible to either pass an entire select helper or only the\npattern inside a select helper as a function argument:\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{foo <- function(data, pattern) \\{\n  extract_column_names(data, select = starts_with(pattern))\n\\}\nfoo(iris, pattern = \"Sep\")\n\nfoo2 <- function(data, pattern) \\{\n  extract_column_names(data, select = pattern)\n\\}\nfoo2(iris, pattern = starts_with(\"Sep\"))\n}\\if{html}{\\out{</div>}}\n\nThis means that it is also possible to use loop values as arguments or patterns:\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{for (i in c(\"Sepal\", \"Sp\")) \\{\n  head(iris) |>\n    extract_column_names(select = starts_with(i)) |>\n    print()\n\\}\n}\\if{html}{\\out{</div>}}\n\nHowever, this behavior is limited to a \"single-level function\". It will not\nwork in nested functions, like below:\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{inner <- function(data, arg) \\{\n  extract_column_names(data, select = arg)\n\\}\nouter <- function(data, arg) \\{\n  inner(data, starts_with(arg))\n\\}\nouter(iris, \"Sep\")\n}\\if{html}{\\out{</div>}}\n\nIn this case, it is better to pass the whole select helper as the argument of\n\\code{outer()}:\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{outer <- function(data, arg) \\{\n  inner(data, arg)\n\\}\nouter(iris, starts_with(\"Sep\"))\n}\\if{html}{\\out{</div>}}\n}\n\\examples{\n# Find column names by pattern\nextract_column_names(iris, starts_with(\"Sepal\"))\nextract_column_names(iris, ends_with(\"Width\"))\nextract_column_names(iris, regex(\"\\\\\\\\.\"))\nextract_column_names(iris, c(\"Petal.Width\", \"Sepal.Length\"))\n\n# starts with \"Sepal\", but not allowed to end with \"width\"\nextract_column_names(iris, starts_with(\"Sepal\"), exclude = contains(\"Width\"))\n\n# find numeric with mean > 3.5\nnumeric_mean_35 <- function(x) is.numeric(x) && mean(x, na.rm = TRUE) > 3.5\nextract_column_names(iris, numeric_mean_35)\n\n# find column names, using range\nextract_column_names(mtcars, c(cyl:hp, wt))\n\n# find range of column names by range, using character vector\nextract_column_names(mtcars, c(\"cyl:hp\", \"wt\"))\n\n# rename returned columns for \"data_select()\"\nhead(data_select(mtcars, c(`Miles per Gallon` = \"mpg\", Cylinders = \"cyl\")))\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/labels_to_levels.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/labels_to_levels.R\n\\name{labels_to_levels}\n\\alias{labels_to_levels}\n\\alias{labels_to_levels.factor}\n\\alias{labels_to_levels.data.frame}\n\\title{Convert value labels into factor levels}\n\\usage{\nlabels_to_levels(x, ...)\n\n\\method{labels_to_levels}{factor}(x, verbose = TRUE, ...)\n\n\\method{labels_to_levels}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  append = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A data frame or factor. Other variable types (e.g. numerics) are not\nallowed.}\n\n\\item{...}{Currently not used.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, recoded or converted variables\nget new column names and are appended (column bind) to \\code{x}, thus returning\nboth the original and the recoded variables. The new columns get a suffix,\nbased on the calling function: \\code{\"_r\"} for recode functions, \\code{\"_n\"} for\n\\code{to_numeric()}, \\code{\"_f\"} for \\code{to_factor()}, or \\code{\"_s\"} for\n\\code{slide()}. If \\code{append=FALSE}, original variables in \\code{x} will be\noverwritten by their recoded versions. If a character value, recoded\nvariables are appended with new column names (using the defined suffix) to\nthe original data frame.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\n\\code{x}, where for all factors former levels are replaced by their value\nlabels.\n}\n\\description{\nConvert value labels into factor levels\n}\n\\details{\n\\code{labels_to_levels()} allows to use value labels of factors as their levels.\n}\n\\examples{\ndata(efc)\n# create factor\nx <- as.factor(efc$c172code)\n# add value labels - these are not factor levels yet\nx <- assign_labels(x, values = c(`1` = \"low\", `2` = \"mid\", `3` = \"high\"))\nlevels(x)\ndata_tabulate(x)\n\nx <- labels_to_levels(x)\nlevels(x)\ndata_tabulate(x)\n}\n"
  },
  {
    "path": "man/makepredictcall.dw_transformer.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/makepredictcall.R\n\\name{makepredictcall.dw_transformer}\n\\alias{makepredictcall.dw_transformer}\n\\title{Utility Function for Safe Prediction with \\code{datawizard} transformers}\n\\usage{\n\\method{makepredictcall}{dw_transformer}(var, call)\n}\n\\arguments{\n\\item{var}{A variable.}\n\n\\item{call}{The term in the formula, as a call.}\n}\n\\value{\nA replacement for \\code{call} for the \\code{predvars} attribute of\n  the terms.\n}\n\\description{\nThis function allows for the use of (some of) \\code{datawizard}'s transformers\ninside a model formula. See examples below.\n\\cr\\cr\nCurrently, \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}}, \\code{\\link[=normalize]{normalize()}}, & \\code{\\link[=rescale]{rescale()}} are\nsupported.\n}\n\\examples{\n\ndata(\"mtcars\")\ntrain <- mtcars[1:30, ]\ntest <- mtcars[31:32, ]\n\nm1 <- lm(mpg ~ center(hp), data = train)\npredict(m1, newdata = test) # Data is \"centered\" before the prediction is made,\n# according to the center of the old data\n\nm2 <- lm(mpg ~ standardize(hp), data = train)\nm3 <- lm(mpg ~ scale(hp), data = train) # same as above\npredict(m2, newdata = test) # Data is \"standardized\" before the prediction is made.\npredict(m3, newdata = test) # Data is \"standardized\" before the prediction is made.\n\n\nm4 <- lm(mpg ~ normalize(hp), data = mtcars)\nm5 <- lm(mpg ~ rescale(hp, to = c(-3, 3)), data = mtcars)\n\n(newdata <- data.frame(hp = c(range(mtcars$hp), 400))) # 400 is outside original range!\n\nmodel.frame(delete.response(terms(m4)), data = newdata)\nmodel.frame(delete.response(terms(m5)), data = newdata)\n\n}\n\\seealso{\n\\code{\\link[stats:makepredictcall]{stats::makepredictcall()}}\n}\n\\concept{datawizard-transformers}\n"
  },
  {
    "path": "man/mean_sd.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mean_sd.R\n\\name{mean_sd}\n\\alias{mean_sd}\n\\alias{median_mad}\n\\title{Summary Helpers}\n\\usage{\nmean_sd(x, times = 1L, remove_na = TRUE, named = TRUE, ...)\n\nmedian_mad(\n  x,\n  times = 1L,\n  remove_na = TRUE,\n  constant = 1.4826,\n  named = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A numeric vector (or one that can be coerced to one via\n\\code{as.numeric()}) to be summarized.}\n\n\\item{times}{How many SDs above and below the Mean (or MADs around the Median)}\n\n\\item{remove_na}{Logical. Should \\code{NA} values be removed before computing (\\code{TRUE})\nor not (\\code{FALSE}, default)?}\n\n\\item{named}{Should the vector be named?\n(E.g., \\code{c(\"-SD\" = -1, Mean = 1, \"+SD\" = 2)}.)}\n\n\\item{...}{Not used.}\n\n\\item{constant}{scale factor.}\n}\n\\value{\nA (possibly named) numeric vector of length \\code{2*times + 1} of SDs\nbelow the mean, the mean, and SDs above the mean (or median and MAD).\n}\n\\description{\nSummary Helpers\n}\n\\examples{\nmean_sd(mtcars$mpg)\n\nmean_sd(mtcars$mpg, times = 2L)\n\nmedian_mad(mtcars$mpg)\n\n}\n"
  },
  {
    "path": "man/means_by_group.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/means_by_group.R\n\\name{means_by_group}\n\\alias{means_by_group}\n\\alias{means_by_group.numeric}\n\\alias{means_by_group.data.frame}\n\\title{Summary of mean values by group}\n\\usage{\nmeans_by_group(x, ...)\n\n\\method{means_by_group}{numeric}(x, by = NULL, ci = 0.95, weights = NULL, digits = NULL, ...)\n\n\\method{means_by_group}{data.frame}(\n  x,\n  select = NULL,\n  by = NULL,\n  ci = 0.95,\n  weights = NULL,\n  digits = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A vector or a data frame.}\n\n\\item{...}{Currently not used}\n\n\\item{by}{If \\code{x} is a numeric vector, \\code{by} should be a factor that\nindicates the group-classifying categories. If \\code{x} is a data frame, \\code{by}\nshould be a character string, naming the variable in \\code{x} that is used for\ngrouping. Numeric vectors are coerced to factors. Not that \\code{by} should\nonly refer to a single variable.}\n\n\\item{ci}{Level of confidence interval for mean estimates. Default is \\code{0.95}.\nUse \\code{ci = NA} to suppress confidence intervals.}\n\n\\item{weights}{If \\code{x} is a numeric vector, \\code{weights} should be a vector of\nweights that will be applied to weight all observations. If \\code{x} is a data\nframe, \\code{weights} can also be a character string indicating the name of the\nvariable in \\code{x} that should be used for weighting. Default is \\code{NULL}, so no\nweights are used.}\n\n\\item{digits}{Optional scalar, indicating the amount of digits after decimal\npoint when rounding estimates and values.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA data frame with information on mean and further summary statistics\nfor each sub-group.\n}\n\\description{\nComputes summary table of means by groups.\n}\n\\details{\nThis function is comparable to \\code{aggregate(x, by, mean)}, but provides\nsome further information, including summary statistics from a One-Way-ANOVA\nusing \\code{x} as dependent and \\code{by} as independent variable. \\code{\\link[emmeans:contrast]{emmeans::contrast()}}\nis used to get p-values for each sub-group. P-values indicate whether each\ngroup-mean is significantly different from the total mean.\n}\n\\examples{\ndata(efc)\nmeans_by_group(efc, \"c12hour\", \"e42dep\")\n\ndata(iris)\nmeans_by_group(iris, \"Sepal.Width\", \"Species\")\n\n# weighting\nefc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\nmeans_by_group(efc, \"c12hour\", \"e42dep\", weights = \"weight\")\n}\n"
  },
  {
    "path": "man/nhanes_sample.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{nhanes_sample}\n\\alias{nhanes_sample}\n\\title{Sample dataset from the National Health and Nutrition Examination Survey}\n\\description{\nSelected variables from the National Health and Nutrition Examination\nSurvey that are used in the example from Lumley (2010), Appendix E.\n}\n\\references{\nLumley T (2010). Complex Surveys: a guide to analysis using R. Wiley\n}\n\\keyword{data}\n"
  },
  {
    "path": "man/normalize.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/normalize.R, R/unnormalize.R\n\\name{normalize}\n\\alias{normalize}\n\\alias{normalize.numeric}\n\\alias{normalize.data.frame}\n\\alias{unnormalize}\n\\alias{unnormalize.numeric}\n\\alias{unnormalize.data.frame}\n\\alias{unnormalize.grouped_df}\n\\title{Normalize numeric variable to 0-1 range}\n\\usage{\nnormalize(x, ...)\n\n\\method{normalize}{numeric}(x, include_bounds = TRUE, verbose = TRUE, ...)\n\n\\method{normalize}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  include_bounds = TRUE,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\nunnormalize(x, ...)\n\n\\method{unnormalize}{numeric}(x, verbose = TRUE, ...)\n\n\\method{unnormalize}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\n\\method{unnormalize}{grouped_df}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A numeric vector, (grouped) data frame, or matrix. See 'Details'.}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{include_bounds}{Numeric or logical. Using this can be useful in case of\nbeta-regression, where the response variable is not allowed to include\nzeros and ones. If \\code{TRUE}, the input is normalized to a range that includes\nzero and one. If \\code{FALSE}, the return value is compressed, using\nSmithson and Verkuilen's (2006) formula \\code{(x * (n - 1) + 0.5) / n}, to avoid\nzeros and ones in the normalized variables. Else, if numeric (e.g., \\code{0.001}),\n\\code{include_bounds} defines the \"distance\" to the lower and upper bound, i.e.\nthe normalized vectors are rescaled to a range from \\code{0 + include_bounds} to\n\\code{1 - include_bounds}.}\n\n\\item{verbose}{Toggle warnings and messages on or off.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, standardized variables get new\ncolumn names (with the suffix \\code{\"_z\"}) and are appended (column bind) to \\code{x},\nthus returning both the original and the standardized variables. If \\code{FALSE},\noriginal variables in \\code{x} will be overwritten by their standardized versions.\nIf a character value, standardized variables are appended with new column\nnames (using the defined suffix) to the original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\nA normalized object.\n}\n\\description{\nPerforms a normalization of data, i.e., it scales variables in the range\n0 - 1. This is a special case of \\code{\\link[=rescale]{rescale()}}. \\code{unnormalize()} is the\ncounterpart, but only works for variables that have been normalized with\n\\code{normalize()}.\n}\n\\details{\n\\itemize{\n\\item If \\code{x} is a matrix, normalization is performed across all values (not\ncolumn- or row-wise). For column-wise normalization, convert the matrix to a\ndata.frame.\n\\item If \\code{x} is a grouped data frame (\\code{grouped_df}), normalization is performed\nseparately for each group.\n}\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\n\nnormalize(c(0, 1, 5, -5, -2))\nnormalize(c(0, 1, 5, -5, -2), include_bounds = FALSE)\n# use a value defining the bounds\nnormalize(c(0, 1, 5, -5, -2), include_bounds = 0.001)\n\nhead(normalize(trees))\n\n}\n\\references{\nSmithson M, Verkuilen J (2006). A Better Lemon Squeezer? Maximum-Likelihood\nRegression with Beta-Distributed Dependent Variables. Psychological Methods,\n11(1), 54–71.\n}\n\\seealso{\nSee \\code{\\link[=makepredictcall.dw_transformer]{makepredictcall.dw_transformer()}} for use in model formulas.\n\nOther transform utilities: \n\\code{\\link{ranktransform}()},\n\\code{\\link{rescale}()},\n\\code{\\link{reverse}()},\n\\code{\\link{standardize}()}\n}\n\\concept{transform utilities}\n"
  },
  {
    "path": "man/ranktransform.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ranktransform.R\n\\name{ranktransform}\n\\alias{ranktransform}\n\\alias{ranktransform.numeric}\n\\alias{ranktransform.data.frame}\n\\title{(Signed) rank transformation}\n\\usage{\nranktransform(x, ...)\n\n\\method{ranktransform}{numeric}(\n  x,\n  sign = FALSE,\n  method = \"average\",\n  zeros = \"na\",\n  verbose = TRUE,\n  ...\n)\n\n\\method{ranktransform}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  sign = FALSE,\n  method = \"average\",\n  ignore_case = FALSE,\n  regex = FALSE,\n  zeros = \"na\",\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{Object.}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{sign}{Logical, if \\code{TRUE}, return signed ranks.}\n\n\\item{method}{Treatment of ties. Can be one of \\code{\"average\"} (default),\n\\code{\"first\"}, \\code{\"last\"}, \\code{\"random\"}, \\code{\"max\"} or \\code{\"min\"}. See \\code{\\link[=rank]{rank()}} for\ndetails.}\n\n\\item{zeros}{How to handle zeros. If \\code{\"na\"} (default), they are marked as\n\\code{NA}. If \\code{\"signrank\"}, they are kept during the ranking and marked as zeros.\nThis is only used when \\code{sign = TRUE}.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\nA rank-transformed object.\n}\n\\description{\nTransform numeric values with the integers of their rank (i.e., 1st smallest,\n2nd smallest, 3rd smallest, etc.). Setting the \\code{sign} argument to \\code{TRUE} will\ngive you signed ranks, where the ranking is done according to absolute size\nbut where the sign is preserved (i.e., 2, 1, -3, 4).\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\nranktransform(c(0, 1, 5, -5, -2))\n\n# By default, zeros are converted to NA\nsuppressWarnings(\n  ranktransform(c(0, 1, 5, -5, -2), sign = TRUE)\n)\nranktransform(c(0, 1, 5, -5, -2), sign = TRUE, zeros = \"signrank\")\n\nhead(ranktransform(trees))\n}\n\\seealso{\nOther transform utilities: \n\\code{\\link{normalize}()},\n\\code{\\link{rescale}()},\n\\code{\\link{reverse}()},\n\\code{\\link{standardize}()}\n}\n\\concept{transform utilities}\n"
  },
  {
    "path": "man/recode_into.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/recode_into.R\n\\name{recode_into}\n\\alias{recode_into}\n\\title{Recode values from one or more variables into a new variable}\n\\usage{\nrecode_into(\n  ...,\n  data = NULL,\n  default = NA,\n  overwrite = TRUE,\n  preserve_na = FALSE,\n  verbose = TRUE\n)\n}\n\\arguments{\n\\item{...}{A sequence of two-sided formulas, where the left hand side (LHS)\nis a logical matching condition that determines which values match this case.\nThe LHS of this formula is also called \"recode pattern\" (e.g., in messages).\nThe right hand side (RHS) indicates the replacement value.}\n\n\\item{data}{Optional, name of a data frame. This can be used to avoid writing\nthe data name multiple times in \\code{...}. See 'Examples'.}\n\n\\item{default}{Indicates the default value that is chosen when no match in\nthe formulas in \\code{...} is found. If not provided, \\code{NA} is used as default\nvalue.}\n\n\\item{overwrite}{Logical, if \\code{TRUE} (default) and more than one recode pattern\napply to the same case, already recoded values will be overwritten by subsequent\nrecode patterns. If \\code{FALSE}, former recoded cases will not be altered by later\nrecode patterns that would apply to those cases again. A warning message is\nprinted to alert such situations and to avoid unintentional recodings.}\n\n\\item{preserve_na}{Logical, if \\code{TRUE} and \\code{default} is not \\code{NA}, missing\nvalues in the original variable will be set back to \\code{NA} in the recoded\nvariable (unless overwritten by other recode patterns). If \\code{FALSE}, missing\nvalues in the original variable will be recoded to \\code{default}. Setting\n\\code{preserve_na = TRUE} prevents unintentional overwriting of missing values\nwith \\code{default}, which means that you won't find valid values where the\noriginal data only had missing values. See 'Examples'.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA vector with recoded values.\n}\n\\description{\nThis functions recodes values from one or more variables into a new variable.\nIt is a convenient function to avoid nested \\code{\\link[=ifelse]{ifelse()}} statements, which\nis similar to \\code{dplyr::case_when()}.\n}\n\\examples{\nx <- 1:30\nrecode_into(\n  x > 15 ~ \"a\",\n  x > 10 & x <= 15 ~ \"b\",\n  default = \"c\"\n)\n\nx <- 1:10\n# default behaviour: second recode pattern \"x > 5\" overwrites\n# some of the formerly recoded cases from pattern \"x >= 3 & x <= 7\"\nrecode_into(\n  x >= 3 & x <= 7 ~ 1,\n  x > 5 ~ 2,\n  default = 0,\n  verbose = FALSE\n)\n\n# setting \"overwrite = FALSE\" will not alter formerly recoded cases\nrecode_into(\n  x >= 3 & x <= 7 ~ 1,\n  x > 5 ~ 2,\n  default = 0,\n  overwrite = FALSE,\n  verbose = FALSE\n)\n\nset.seed(123)\nd <- data.frame(\n  x = sample(1:5, 30, TRUE),\n  y = sample(letters[1:5], 30, TRUE),\n  stringsAsFactors = FALSE\n)\n\n# from different variables into new vector\nrecode_into(\n  d$x \\%in\\% 1:3 & d$y \\%in\\% c(\"a\", \"b\") ~ 1,\n  d$x > 3 ~ 2,\n  default = 0\n)\n\n# no need to write name of data frame each time\nrecode_into(\n  x \\%in\\% 1:3 & y \\%in\\% c(\"a\", \"b\") ~ 1,\n  x > 3 ~ 2,\n  data = d,\n  default = 0\n)\n\n# handling of missing values\nd <- data.frame(\n  x = c(1, NA, 2, NA, 3, 4),\n  y = c(1, 11, 3, NA, 5, 6)\n)\n# first NA in x is overwritten by valid value from y\n# we have no known value for second NA in x and y,\n# thus we get one NA in the result\nrecode_into(\n  x <= 3 ~ 1,\n  y > 5 ~ 2,\n  data = d,\n  default = 0,\n  preserve_na = TRUE\n)\n# first NA in x is overwritten by valid value from y\n# default value is used for second NA\nrecode_into(\n  x <= 3 ~ 1,\n  y > 5 ~ 2,\n  data = d,\n  default = 0,\n  preserve_na = FALSE\n)\n}\n"
  },
  {
    "path": "man/recode_values.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/recode_values.R\n\\name{recode_values}\n\\alias{recode_values}\n\\alias{recode_values.numeric}\n\\alias{recode_values.data.frame}\n\\title{Recode old values of variables into new values}\n\\usage{\nrecode_values(x, ...)\n\n\\method{recode_values}{numeric}(\n  x,\n  recode = NULL,\n  default = NULL,\n  preserve_na = TRUE,\n  verbose = TRUE,\n  ...\n)\n\n\\method{recode_values}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  recode = NULL,\n  default = NULL,\n  preserve_na = TRUE,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A data frame, numeric or character vector, or factor.}\n\n\\item{...}{not used.}\n\n\\item{recode}{A list of named vectors, which indicate the recode pairs.\nThe \\emph{names} of the list-elements (i.e. the left-hand side) represent the\n\\emph{new} values, while the values of the list-elements indicate the original\n(old) values that should be replaced. When recoding numeric vectors,\nelement names have to be surrounded in backticks. For example,\n\\code{recode=list(`0`=1)} would recode all \\code{1} into \\code{0} in a numeric\nvector. See also 'Examples' and 'Details'.}\n\n\\item{default}{Defines the default value for all values that have no match in\nthe recode-pairs. If \\code{NULL}, original values will be preserved when there\nis no match. Note that, if \\code{preserve_na=FALSE}, missing values (\\code{NA}) are\nalso captured by the \\code{default} argument, and thus will also be recoded into\nthe specified value. See 'Examples' and 'Details'.}\n\n\\item{preserve_na}{Logical, if \\code{TRUE}, \\code{NA} (missing values) are preserved.\nThis overrides any other arguments, including \\code{default}. Hence, if\n\\code{preserve_na=TRUE}, \\code{default} will no longer convert \\code{NA} into the specified\ndefault value.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, recoded or converted variables\nget new column names and are appended (column bind) to \\code{x}, thus returning\nboth the original and the recoded variables. The new columns get a suffix,\nbased on the calling function: \\code{\"_r\"} for recode functions, \\code{\"_n\"} for\n\\code{to_numeric()}, \\code{\"_f\"} for \\code{to_factor()}, or \\code{\"_s\"} for\n\\code{slide()}. If \\code{append=FALSE}, original variables in \\code{x} will be\noverwritten by their recoded versions. If a character value, recoded\nvariables are appended with new column names (using the defined suffix) to\nthe original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\n\\code{x}, where old values are replaced by new values.\n}\n\\description{\nThis functions recodes old values into new values and can be used to to\nrecode numeric or character vectors, or factors.\n}\n\\details{\nThis section describes the pattern of the \\code{recode} arguments, which also\nprovides some shortcuts, in particular when recoding numeric values.\n\\itemize{\n\\item Single values\n\nSingle values either need to be wrapped in backticks (in case of numeric\nvalues) or \"as is\" (for character or factor levels). Example:\n\\code{recode=list(`0`=1,`1`=2)} would recode 1 into 0, and 2 into 1.\nFor factors or character vectors, an example is:\n\\code{recode=list(x=\"a\",y=\"b\")} (recode \"a\" into \"x\" and \"b\" into \"y\").\n\\item Multiple values\n\nMultiple values that should be recoded into a new value can be separated\nwith comma. Example: \\code{recode=list(`1`=c(1,4),`2`=c(2,3))} would recode the\nvalues 1 and 4 into 1, and 2 and 3 into 2. It is also possible to define  the\nold values as a character string, like:  \\code{recode=list(`1`=\"1,4\",`2`=\"2,3\")}\nFor factors or character vectors, an example is:\n\\code{recode=list(x=c(\"a\",\"b\"),y=c(\"c\",\"d\"))}.\n\\item Value range\n\nNumeric value ranges can be defined using the \\code{:}. Example:\n\\code{recode=list(`1`=1:3,`2`=4:6)} would recode all values from 1 to 3 into\n1, and 4 to 6 into 2.\n\\item \\code{min} and \\code{max}\n\nplaceholder to use the minimum or maximum value of the\n(numeric) variable. Useful, e.g., when recoding ranges of values.\nExample: \\code{recode=list(`1`=\"min:10\",`2`=\"11:max\")}.\n\\item \\code{default} values\n\nThe \\code{default} argument defines the default value for all values that have\nno match in the recode-pairs. For example,\n\\verb{recode=list(`1`=c(1,2),`2`=c(3,4)), default=9} would\nrecode values 1 and 2 into 1, 3 and 4 into 2, and all other values into 9.\nIf \\code{preserve_na} is set to \\code{FALSE}, \\code{NA} (missing values) will also be\nrecoded into the specified default value.\n\\item Reversing and rescaling\n\nSee \\code{\\link[=reverse]{reverse()}} and \\code{\\link[=rescale]{rescale()}}.\n}\n}\n\\note{\nYou can use \\code{options(data_recode_pattern = \"old=new\")} to switch the\nbehaviour of the \\code{recode}-argument, i.e. recode-pairs are now following the\npattern \\verb{old values = new values}, e.g. if \\code{getOption(\"data_recode_pattern\")}\nis set to \\code{\"old=new\"}, then \\code{recode(`1`=0)} would recode all 1 into 0.\nThe default for \\code{recode(`1`=0)} is to recode all 0 into 1.\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\n# numeric ----------\nset.seed(123)\nx <- sample(c(1:4, NA), 15, TRUE)\ntable(x, useNA = \"always\")\n\nout <- recode_values(x, list(`0` = 1, `1` = 2:3, `2` = 4))\nout\ntable(out, useNA = \"always\")\n\n# to recode NA values, set preserve_na to FALSE\nout <- recode_values(\n  x,\n  list(`0` = 1, `1` = 2:3, `2` = 4, `9` = NA),\n  preserve_na = FALSE\n)\nout\ntable(out, useNA = \"always\")\n\n# preserve na ----------\nout <- recode_values(x, list(`0` = 1, `1` = 2:3), default = 77)\nout\ntable(out, useNA = \"always\")\n\n# recode na into default ----------\nout <- recode_values(\n  x,\n  list(`0` = 1, `1` = 2:3),\n  default = 77,\n  preserve_na = FALSE\n)\nout\ntable(out, useNA = \"always\")\n\n\n# factors (character vectors are similar) ----------\nset.seed(123)\nx <- as.factor(sample(c(\"a\", \"b\", \"c\"), 15, TRUE))\ntable(x)\n\nout <- recode_values(x, list(x = \"a\", y = c(\"b\", \"c\")))\nout\ntable(out)\n\nout <- recode_values(x, list(x = \"a\", y = \"b\", z = \"c\"))\nout\ntable(out)\n\nout <- recode_values(x, list(y = \"b,c\"), default = 77)\n# same as\n# recode_values(x, list(y = c(\"b\", \"c\")), default = 77)\nout\ntable(out)\n\n\n# data frames ----------\nset.seed(123)\nd <- data.frame(\n  x = sample(c(1:4, NA), 12, TRUE),\n  y = as.factor(sample(c(\"a\", \"b\", \"c\"), 12, TRUE)),\n  stringsAsFactors = FALSE\n)\n\nrecode_values(\n  d,\n  recode = list(`0` = 1, `1` = 2:3, `2` = 4, x = \"a\", y = c(\"b\", \"c\")),\n  append = TRUE\n)\n\n\n# switch recode pattern to \"old=new\" ----------\noptions(data_recode_pattern = \"old=new\")\n\n# numeric\nset.seed(123)\nx <- sample(c(1:4, NA), 15, TRUE)\ntable(x, useNA = \"always\")\n\nout <- recode_values(x, list(`1` = 0, `2:3` = 1, `4` = 2))\ntable(out, useNA = \"always\")\n\n# factors (character vectors are similar)\nset.seed(123)\nx <- as.factor(sample(c(\"a\", \"b\", \"c\"), 15, TRUE))\ntable(x)\n\nout <- recode_values(x, list(a = \"x\", `b, c` = \"y\"))\ntable(out)\n\n# reset options\noptions(data_recode_pattern = NULL)\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/reexports.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_tabulate.R\n\\docType{import}\n\\name{reexports}\n\\alias{reexports}\n\\alias{print_html}\n\\alias{print_md}\n\\alias{display}\n\\title{Objects exported from other packages}\n\\keyword{internal}\n\\description{\nThese objects are imported from other packages. Follow the links\nbelow to see their documentation.\n\n\\describe{\n  \\item{insight}{\\code{\\link[insight]{display}}, \\code{\\link[insight:display]{print_html}}, \\code{\\link[insight:display]{print_md}}}\n}}\n\n"
  },
  {
    "path": "man/remove_empty.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/remove_empty.R\n\\name{remove_empty}\n\\alias{remove_empty}\n\\alias{empty_columns}\n\\alias{empty_rows}\n\\alias{remove_empty_columns}\n\\alias{remove_empty_rows}\n\\title{Return or remove variables or observations that are completely missing}\n\\usage{\nempty_columns(x)\n\nempty_rows(x)\n\nremove_empty_columns(x)\n\nremove_empty_rows(x)\n\nremove_empty(x)\n}\n\\arguments{\n\\item{x}{A data frame.}\n}\n\\value{\n\\itemize{\n\\item For \\code{empty_columns()} and \\code{empty_rows()}, a numeric (named) vector with row\nor column indices of those variables that completely have missing values.\n\\item For \\code{remove_empty_columns()} and \\code{remove_empty_rows()}, a data frame with\n\"empty\" columns or rows removed, respectively.\n\\item For \\code{remove_empty()}, \\strong{both} empty rows and columns will be removed.\n}\n}\n\\description{\nThese functions check which rows or columns of a data frame completely\ncontain missing values, i.e. which observations or variables completely have\nmissing values, and either (1) returns their indices; or (2) removes them\nfrom the data frame.\n}\n\\details{\nFor character vectors, empty string values (i.e. \\code{\"\"}) are also\nconsidered as missing value. Thus, if a character vector only contains \\code{NA}\nand \\code{\"\"}, it is considered as empty variable and will be removed. Same\napplies to observations (rows) that only contain \\code{NA} or \\code{\"\"}.\n}\n\\examples{\ntmp <- data.frame(\n  a = c(1, 2, 3, NA, 5),\n  b = c(1, NA, 3, NA, 5),\n  c = c(NA, NA, NA, NA, NA),\n  d = c(1, NA, 3, NA, 5)\n)\n\ntmp\n\n# indices of empty columns or rows\nempty_columns(tmp)\nempty_rows(tmp)\n\n# remove empty columns or rows\nremove_empty_columns(tmp)\nremove_empty_rows(tmp)\n\n# remove empty columns and rows\nremove_empty(tmp)\n\n# also remove \"empty\" character vectors\ntmp <- data.frame(\n  a = c(1, 2, 3, NA, 5),\n  b = c(1, NA, 3, NA, 5),\n  c = c(\"\", \"\", \"\", \"\", \"\"),\n  stringsAsFactors = FALSE\n)\nempty_columns(tmp)\n\n}\n"
  },
  {
    "path": "man/replace_nan_inf.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/replace_nan_inf.R\n\\name{replace_nan_inf}\n\\alias{replace_nan_inf}\n\\title{Convert infinite or \\code{NaN} values into \\code{NA}}\n\\usage{\nreplace_nan_inf(x, ...)\n}\n\\arguments{\n\\item{x}{A vector or a dataframe}\n\n\\item{...}{Currently not used.}\n}\n\\value{\nData with \\code{Inf}, \\code{-Inf}, and \\code{NaN} converted to \\code{NA}.\n}\n\\description{\nReplaces all infinite (\\code{Inf} and \\code{-Inf}) or \\code{NaN} values with \\code{NA}.\n}\n\\examples{\n# a vector\nx <- c(1, 2, NA, 3, NaN, 4, NA, 5, Inf, -Inf, 6, 7)\nreplace_nan_inf(x)\n\n# a data frame\ndf <- data.frame(\n  x = c(1, NA, 5, Inf, 2, NA),\n  y = c(3, NaN, 4, -Inf, 6, 7),\n  stringsAsFactors = FALSE\n)\nreplace_nan_inf(df)\n}\n"
  },
  {
    "path": "man/rescale.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_rescale.R\n\\name{rescale}\n\\alias{rescale}\n\\alias{change_scale}\n\\alias{rescale.numeric}\n\\alias{rescale.data.frame}\n\\title{Rescale Variables to a New Range}\n\\usage{\nrescale(x, ...)\n\nchange_scale(x, ...)\n\n\\method{rescale}{numeric}(\n  x,\n  to = c(0, 100),\n  multiply = NULL,\n  add = NULL,\n  range = NULL,\n  verbose = TRUE,\n  ...\n)\n\n\\method{rescale}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  to = c(0, 100),\n  multiply = NULL,\n  add = NULL,\n  range = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A (grouped) data frame, numeric vector or factor.}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{to}{Numeric vector of length 2 giving the new range that the variable\nwill have after rescaling. To reverse-score a variable, the range should\nbe given with the maximum value first. See examples.}\n\n\\item{multiply}{If not \\code{NULL}, \\code{to} is ignored and \\code{multiply} will be used,\ngiving the factor by which the actual range of \\code{x} should be expanded.\nFor example, if a vector ranges from 5 to 15 and \\code{multiply = 1.1}, the current\nrange of 10 will be expanded by the factor of 1.1, giving a new range of\n11. Thus, the rescaled vector would range from 4.5 to 15.5.}\n\n\\item{add}{A vector of length 1 or 2. If not \\code{NULL}, \\code{to} is ignored and \\code{add}\nwill be used, giving the amount by which the minimum and maximum of the\nactual range of \\code{x} should be expanded. For example, if a vector ranges from\n5 to 15 and \\code{add = 1}, the range will be expanded from 4 to 16. If \\code{add} is\nof length 2, then the first value is used for the lower bound and the second\nvalue for the upper bound.}\n\n\\item{range}{Initial (old) range of values. If \\code{NULL}, will take the range of\nthe input vector (\\code{range(x)}).}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, recoded or converted variables\nget new column names and are appended (column bind) to \\code{x}, thus returning\nboth the original and the recoded variables. The new columns get a suffix,\nbased on the calling function: \\code{\"_r\"} for recode functions, \\code{\"_n\"} for\n\\code{to_numeric()}, \\code{\"_f\"} for \\code{to_factor()}, or \\code{\"_s\"} for\n\\code{slide()}. If \\code{append=FALSE}, original variables in \\code{x} will be\noverwritten by their recoded versions. If a character value, recoded\nvariables are appended with new column names (using the defined suffix) to\nthe original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\nA rescaled object.\n}\n\\description{\nRescale variables to a new range. Can also be used to reverse-score variables\n(change the keying/scoring direction), or to expand a range.\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\nrescale(c(0, 1, 5, -5, -2))\nrescale(c(0, 1, 5, -5, -2), to = c(-5, 5))\nrescale(c(1, 2, 3, 4, 5), to = c(-2, 2))\n\n# Specify the \"theoretical\" range of the input vector\nrescale(c(1, 3, 4), to = c(0, 40), range = c(0, 4))\n\n# Reverse-score a variable\nrescale(c(1, 2, 3, 4, 5), to = c(5, 1))\nrescale(c(1, 2, 3, 4, 5), to = c(2, -2))\n\n# Data frames\nhead(rescale(iris, to = c(0, 1)))\nhead(rescale(iris, to = c(0, 1), select = \"Sepal.Length\"))\n\n# One can specify a list of ranges\nhead(rescale(iris, to = list(\n  \"Sepal.Length\" = c(0, 1),\n  \"Petal.Length\" = c(-1, 0)\n)))\n\n# \"expand\" ranges by a factor or a given value\nx <- 5:15\nx\n# both will expand the range by 10\\%\nrescale(x, multiply = 1.1)\nrescale(x, add = 0.5)\n\n# expand range by different values\nrescale(x, add = c(1, 3))\n\n# Specify list of multipliers\nd <- data.frame(x = 5:15, y = 5:15)\nrescale(d, multiply = list(x = 1.1, y = 0.5))\n}\n\\seealso{\nSee \\code{\\link[=makepredictcall.dw_transformer]{makepredictcall.dw_transformer()}} for use in model formulas.\n\nOther transform utilities: \n\\code{\\link{normalize}()},\n\\code{\\link{ranktransform}()},\n\\code{\\link{reverse}()},\n\\code{\\link{standardize}()}\n}\n\\concept{transform utilities}\n"
  },
  {
    "path": "man/rescale_weights.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/rescale_weights.R\n\\name{rescale_weights}\n\\alias{rescale_weights}\n\\title{Rescale design weights for multilevel analysis}\n\\usage{\nrescale_weights(\n  data,\n  probability_weights = NULL,\n  by = NULL,\n  nest = FALSE,\n  method = \"carle\"\n)\n}\n\\arguments{\n\\item{data}{A data frame.}\n\n\\item{probability_weights}{Variable indicating the probability (design or\nsampling) weights of the survey data (level-1-weight), provided as character\nstring or formula.}\n\n\\item{by}{Variable names (as character vector, or as formula), indicating\nthe grouping structure (strata) of the survey data (level-2-cluster\nvariable). It is also possible to create weights for multiple group\nvariables; in such cases, each created weighting variable will be suffixed\nby the name of the group variable. This argument is required for\n\\code{method = \"carle\"}, but optional for \\code{method = \"kish\"}.}\n\n\\item{nest}{Logical, if \\code{TRUE} and \\code{by} indicates at least two group\nvariables, then groups are \"nested\", i.e. groups are now a combination from\neach group level of the variables in \\code{by}. This argument is not used when\n\\code{method = \"kish\"}.}\n\n\\item{method}{String, indicating which rescale-method is used for rescaling\nweights. Can be either \\code{\"carle\"} (default) or \\code{\"kish\"}. See 'Details'. If\n\\code{method = \"carle\"}, the \\code{by} argument is required.}\n}\n\\value{\n\\code{data}, including the new weighting variable(s). For \\code{method = \"carle\"}, new\ncolumns \\code{rescaled_weights_a} and \\code{rescaled_weights_b} are returned, and for\n\\code{method = \"kish\"}, the returned data contains a column \\code{rescaled_weights}.\nThese represent the rescaled design weights to use in multilevel models (use\nthese variables for the \\code{weights} argument).\n}\n\\description{\nMost functions to fit multilevel and mixed effects models only\nallow the user to specify frequency weights, but not design (i.e., sampling\nor probability) weights, which should be used when analyzing complex samples\n(e.g., probability samples). \\code{rescale_weights()} implements two algorithms,\none proposed by \\cite{Asparouhov (2006)} and \\cite{Carle (2009)}, to rescale\ndesign weights in survey data to account for the grouping structure of\nmultilevel models, and one based on the design effect proposed by\n\\cite{Kish (1965)}, to rescale weights by the design effect to account for\nadditional sampling error introduced by weighting.\n}\n\\details{\n\\itemize{\n\\item \\code{method = \"carle\"}\n\nRescaling is based on two methods: For \\code{rescaled_weights_a}, the sample\nweights \\code{probability_weights} are adjusted by a factor that represents the\nproportion of group size divided by the sum of sampling weights within each\ngroup. The adjustment factor for \\code{rescaled_weights_b} is the sum of sample\nweights within each group divided by the sum of squared sample weights\nwithin each group (see Carle (2009), Appendix B). In other words,\n\\code{rescaled_weights_a} \"scales the weights so that the new weights sum to the\ncluster sample size\" while \\code{rescaled_weights_b} \"scales the weights so that\nthe new weights sum to the effective cluster size\".\n\nRegarding the choice between scaling methods A and B, Carle suggests that\n\"analysts who wish to discuss point estimates should report results based\non weighting method A. For analysts more interested in residual\nbetween-group variance, method B may generally provide the least biased\nestimates\". In general, it is recommended to fit a non-weighted model and\nweighted models with both scaling methods and when comparing the models,\nsee whether the \"inferential decisions converge\", to gain confidence in the\nresults.\n\nThough the bias of scaled weights decreases with increasing group size,\nmethod A is preferred when insufficient or low group size is a concern.\n\nThe group ID and probably PSU may be used as random effects (e.g. nested\ndesign, or group and PSU as varying intercepts), depending on the survey\ndesign that should be mimicked.\n\\item \\code{method = \"kish\"}\n\nRescaling is based on scaling the sample weights so the mean value is 1,\nwhich means the sum of all weights equals the sample size. Next, the design\neffect (\\emph{Kish 1965}) is calculated, which is the mean of the squared\nweights divided by the squared mean of the weights. The scaled sample\nweights are then divided by the design effect. This method is most\nappropriate when weights are based on additional variables beyond the\ngrouping variables in the model (e.g., other demographic characteristics),\nbut may also be useful in other contexts.\n\nSome tests on real-world survey-data suggest that, in comparison to the\nCarle-method, the Kish-method comes closer to estimates from a regular\nsurvey-design using the \\strong{survey} package. Note that these tests are not\nrepresentative and it is recommended to check your results against a\nstandard survey-design.\n}\n}\n\\examples{\n\\dontshow{if (all(insight::check_if_installed(c(\"lme4\", \"parameters\"), quietly = TRUE))) withAutoprint(\\{ # examplesIf}\ndata(nhanes_sample)\nhead(rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\"))\n\n# also works with multiple group-variables\nhead(rescale_weights(nhanes_sample, \"WTINT2YR\", c(\"SDMVSTRA\", \"SDMVPSU\")))\n\n# or nested structures.\nx <- rescale_weights(\n  data = nhanes_sample,\n  probability_weights = \"WTINT2YR\",\n  by = c(\"SDMVSTRA\", \"SDMVPSU\"),\n  nest = TRUE\n)\nhead(x)\n\n\\donttest{\n# compare different methods, using multilevel-Poisson regression\n\nd <- rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\")\nresult1 <- lme4::glmer(\n  total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),\n  family = poisson(),\n  data = d,\n  weights = rescaled_weights_a\n)\nresult2 <- lme4::glmer(\n  total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),\n  family = poisson(),\n  data = d,\n  weights = rescaled_weights_b\n)\n\nd <- rescale_weights(\n  nhanes_sample,\n  \"WTINT2YR\",\n  method = \"kish\"\n)\nresult3 <- lme4::glmer(\n  total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),\n  family = poisson(),\n  data = d,\n  weights = rescaled_weights\n)\nd <- rescale_weights(\n  nhanes_sample,\n  \"WTINT2YR\",\n  \"SDMVSTRA\",\n  method = \"kish\"\n)\nresult4 <- lme4::glmer(\n  total ~ factor(RIAGENDR) + log(age) + factor(RIDRETH1) + (1 | SDMVPSU),\n  family = poisson(),\n  data = d,\n  weights = rescaled_weights\n)\nparameters::compare_parameters(\n  list(result1, result2, result3, result4),\n  exponentiate = TRUE,\n  column_names = c(\"Carle (A)\", \"Carle (B)\", \"Kish\", \"Kish (grouped)\")\n)\n}\n\\dontshow{\\}) # examplesIf}\n}\n\\references{\n\\itemize{\n\\item Asparouhov T. (2006). General Multi-Level Modeling with Sampling\nWeights. Communications in Statistics - Theory and Methods 35: 439-460\n\\item Carle A.C. (2009). Fitting multilevel models in complex survey data\nwith design weights: Recommendations. BMC Medical Research Methodology\n9(49): 1-13\n\\item Kish, L. (1965) Survey Sampling. London: Wiley.\n}\n}\n"
  },
  {
    "path": "man/reshape_ci.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/reshape_ci.R\n\\name{reshape_ci}\n\\alias{reshape_ci}\n\\title{Reshape CI between wide/long formats}\n\\usage{\nreshape_ci(x, ci_type = \"CI\")\n}\n\\arguments{\n\\item{x}{A data frame containing columns named \\code{CI_low} and \\code{CI_high} (or\nsimilar, see \\code{ci_type}).}\n\n\\item{ci_type}{String indicating the \"type\" (i.e. prefix) of the interval\ncolumns. Per \\emph{easystats} convention, confidence or credible intervals are\nnamed \\code{CI_low} and \\code{CI_high}, and the related \\code{ci_type} would be \\code{\"CI\"}.\nIf column names for other intervals differ, \\code{ci_type} can be used to\nindicate the name, e.g. \\code{ci_type = \"SI\"} can be used for support intervals,\nwhere the column names in the data frame would be \\code{SI_low} and \\code{SI_high}.}\n}\n\\value{\nA data frame with columns corresponding to confidence intervals reshaped\neither to wide or long format.\n}\n\\description{\nReshape CI between wide/long formats.\n}\n\\examples{\nx <- data.frame(\n  Parameter = c(\"Term 1\", \"Term 2\", \"Term 1\", \"Term 2\"),\n  CI = c(0.8, 0.8, 0.9, 0.9),\n  CI_low = c(0.2, 0.3, 0.1, 0.15),\n  CI_high = c(0.5, 0.6, 0.8, 0.85),\n  stringsAsFactors = FALSE\n)\n\nreshape_ci(x)\nreshape_ci(reshape_ci(x))\n}\n"
  },
  {
    "path": "man/reverse.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data_reverse.R\n\\name{reverse}\n\\alias{reverse}\n\\alias{reverse_scale}\n\\alias{reverse.numeric}\n\\alias{reverse.data.frame}\n\\title{Reverse-Score Variables}\n\\usage{\nreverse(x, ...)\n\nreverse_scale(x, ...)\n\n\\method{reverse}{numeric}(x, range = NULL, verbose = TRUE, ...)\n\n\\method{reverse}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  range = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A (grouped) data frame, numeric vector or factor.}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{range}{Range of values that is used as reference for reversing the\nscale. For numeric variables, can be \\code{NULL} or a numeric vector of length\ntwo, indicating the lowest and highest value of the reference range. If\n\\code{NULL}, will take the range of the input vector (\\code{range(x)}). For factors,\n\\code{range} can be \\code{NULL}, a numeric vector of length two, or a (numeric)\nvector of at least the same length as factor levels (i.e. must be equal\nto or larger than \\code{nlevels(x)}). Note that providing a \\code{range} for factors\nusually only makes sense when factor levels are numeric, not characters.}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, recoded or converted variables\nget new column names and are appended (column bind) to \\code{x}, thus returning\nboth the original and the recoded variables. The new columns get a suffix,\nbased on the calling function: \\code{\"_r\"} for recode functions, \\code{\"_n\"} for\n\\code{to_numeric()}, \\code{\"_f\"} for \\code{to_factor()}, or \\code{\"_s\"} for\n\\code{slide()}. If \\code{append=FALSE}, original variables in \\code{x} will be\noverwritten by their recoded versions. If a character value, recoded\nvariables are appended with new column names (using the defined suffix) to\nthe original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\nA reverse-scored object.\n}\n\\description{\nReverse-score variables (change the keying/scoring direction).\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\nreverse(c(1, 2, 3, 4, 5))\nreverse(c(-2, -1, 0, 2, 1))\n\n# Specify the \"theoretical\" range of the input vector\nreverse(c(1, 3, 4), range = c(0, 4))\n\n# Factor variables\nreverse(factor(c(1, 2, 3, 4, 5)))\nreverse(factor(c(1, 2, 3, 4, 5)), range = 0:10)\n\n# Data frames\nhead(reverse(iris))\nhead(reverse(iris, select = \"Sepal.Length\"))\n\n}\n\\seealso{\nOther transform utilities: \n\\code{\\link{normalize}()},\n\\code{\\link{ranktransform}()},\n\\code{\\link{rescale}()},\n\\code{\\link{standardize}()}\n}\n\\concept{transform utilities}\n"
  },
  {
    "path": "man/row_count.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/row_count.R\n\\name{row_count}\n\\alias{row_count}\n\\title{Count specific values row-wise}\n\\usage{\nrow_count(\n  data,\n  select = NULL,\n  exclude = NULL,\n  count = NULL,\n  allow_coercion = TRUE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE\n)\n}\n\\arguments{\n\\item{data}{A data frame with at least two columns, where number of specific\nvalues are counted row-wise.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{count}{The value for which the row sum should be computed. May be a\nnumeric value, a character string (for factors or character vectors), \\code{NA} or\n\\code{Inf}.}\n\n\\item{allow_coercion}{Logical. If \\code{FALSE}, \\code{count} matches only values of same\nclass (i.e. when \\code{count = 2}, the value \\code{\"2\"} is not counted and vice versa).\nBy default, when \\code{allow_coercion = TRUE}, \\code{count = 2} also matches \\code{\"2\"}. In\norder to count factor levels in the data, use \\code{count = factor(\"level\")}. See\n'Examples'.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA vector with row-wise counts of values specified in \\code{count}.\n}\n\\description{\n\\code{row_count()} mimics base R's \\code{rowSums()}, with sums for a\nspecific value indicated by \\code{count}. Hence, it is similar to\n\\code{rowSums(x == count, na.rm = TRUE)}, but offers some more options, including\nstrict comparisons. Comparisons using \\code{==} coerce values to atomic vectors,\nthus both \\code{2 == 2} and \\code{\"2\" == 2} are \\code{TRUE}. In \\code{row_count()}, it is also\npossible to make \"type safe\" comparisons using the \\code{allow_coercion} argument,\nwhere \\code{\"2\" == 2} is not true.\n}\n\\examples{\ndat <- data.frame(\n  c1 = c(1, 2, NA, 4),\n  c2 = c(NA, 2, NA, 5),\n  c3 = c(NA, 4, NA, NA),\n  c4 = c(2, 3, 7, 8)\n)\n\n# count all 4s per row\nrow_count(dat, count = 4)\n# count all missing values per row\nrow_count(dat, count = NA)\n\ndat <- data.frame(\n  c1 = c(\"1\", \"2\", NA, \"3\"),\n  c2 = c(NA, \"2\", NA, \"3\"),\n  c3 = c(NA, 4, NA, NA),\n  c4 = c(2, 3, 7, Inf)\n)\n# count all 2s and \"2\"s per row\nrow_count(dat, count = 2)\n# only count 2s, but not \"2\"s\nrow_count(dat, count = 2, allow_coercion = FALSE)\n\ndat <- data.frame(\n  c1 = factor(c(\"1\", \"2\", NA, \"3\")),\n  c2 = c(\"2\", \"1\", NA, \"3\"),\n  c3 = c(NA, 4, NA, NA),\n  c4 = c(2, 3, 7, Inf)\n)\n# find only character \"2\"s\nrow_count(dat, count = \"2\", allow_coercion = FALSE)\n# find only factor level \"2\"s\nrow_count(dat, count = factor(\"2\"), allow_coercion = FALSE)\n\n}\n"
  },
  {
    "path": "man/row_means.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/row_means.R\n\\name{row_means}\n\\alias{row_means}\n\\alias{row_sums}\n\\title{Row means or sums (optionally with minimum amount of valid values)}\n\\usage{\nrow_means(\n  data,\n  select = NULL,\n  exclude = NULL,\n  min_valid = NULL,\n  digits = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  remove_na = FALSE,\n  verbose = TRUE\n)\n\nrow_sums(\n  data,\n  select = NULL,\n  exclude = NULL,\n  min_valid = NULL,\n  digits = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  remove_na = FALSE,\n  verbose = TRUE\n)\n}\n\\arguments{\n\\item{data}{A data frame with at least two columns, where row means or row\nsums are applied.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{min_valid}{Optional, a numeric value of length 1. May either be\n\\itemize{\n\\item a numeric value that indicates the amount of valid values per row to\ncalculate the row mean or row sum;\n\\item or a value between \\code{0} and \\code{1}, indicating a proportion of valid values per\nrow to calculate the row mean or row sum (see 'Details').\n\\item \\code{NULL} (default), in which all cases are considered.\n}\n\nIf a row's sum of valid values is less than \\code{min_valid}, \\code{NA} will be returned.}\n\n\\item{digits}{Numeric value indicating the number of decimal places to be\nused for rounding mean values. Negative values are allowed (see 'Details').\nBy default, \\code{digits = NULL} and no rounding is used.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{remove_na}{Logical, if \\code{TRUE} (default), removes missing (\\code{NA}) values\nbefore calculating row means or row sums. Only applies if \\code{min_valid} is not\nspecified.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA vector with row means (for \\code{row_means()}) or row sums (for\n\\code{row_sums()}) for those rows with at least \\code{n} valid values.\n}\n\\description{\nThis function is similar to the SPSS \\code{MEAN.n} or \\code{SUM.n}\nfunction and computes row means or row sums from a data frame or matrix if at\nleast \\code{min_valid} values of a row are valid (and not \\code{NA}).\n}\n\\details{\nRounding to a negative number of \\code{digits} means rounding to a power\nof ten, for example \\code{row_means(df, 3, digits = -2)} rounds to the nearest\nhundred. For \\code{min_valid}, if not \\code{NULL}, \\code{min_valid} must be a numeric value\nfrom \\code{0} to \\code{ncol(data)}. If a row in the data frame has at least \\code{min_valid}\nnon-missing values, the row mean or row sum is returned. If \\code{min_valid} is a\nnon-integer value from 0 to 1, \\code{min_valid} is considered to indicate the\nproportion of required non-missing values per row. E.g., if\n\\code{min_valid = 0.75}, a row must have at least \\code{ncol(data) * min_valid}\nnon-missing values for the row mean or row sum to be calculated. See\n'Examples'.\n}\n\\examples{\ndat <- data.frame(\n  c1 = c(1, 2, NA, 4),\n  c2 = c(NA, 2, NA, 5),\n  c3 = c(NA, 4, NA, NA),\n  c4 = c(2, 3, 7, 8)\n)\n\n# default, all means are shown, if no NA values are present\nrow_means(dat)\n\n# remove all NA before computing row means\nrow_means(dat, remove_na = TRUE)\n\n# needs at least 4 non-missing values per row\nrow_means(dat, min_valid = 4) # 1 valid return value\nrow_sums(dat, min_valid = 4) # 1 valid return value\n\n# needs at least 3 non-missing values per row\nrow_means(dat, min_valid = 3) # 2 valid return values\n\n# needs at least 2 non-missing values per row\nrow_means(dat, min_valid = 2)\n\n# needs at least 1 non-missing value per row, for two selected variables\nrow_means(dat, select = c(\"c1\", \"c3\"), min_valid = 1)\n\n# needs at least 50\\% of non-missing values per row\nrow_means(dat, min_valid = 0.5) # 3 valid return values\nrow_sums(dat, min_valid = 0.5)\n\n# needs at least 75\\% of non-missing values per row\nrow_means(dat, min_valid = 0.75) # 2 valid return values\n\n}\n"
  },
  {
    "path": "man/rownames.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils-rows.R\n\\name{rownames_as_column}\n\\alias{rownames_as_column}\n\\alias{column_as_rownames}\n\\alias{rowid_as_column}\n\\title{Tools for working with row names or row ids}\n\\usage{\nrownames_as_column(x, var = \"rowname\")\n\ncolumn_as_rownames(x, var = \"rowname\")\n\nrowid_as_column(x, var = \"rowid\")\n}\n\\arguments{\n\\item{x}{A data frame.}\n\n\\item{var}{Name of column to use for row names/ids. For \\code{column_as_rownames()},\nthis argument can be the variable name or the column number. For\n\\code{rownames_as_column()} and \\code{rowid_as_column()}, the column name must not\nalready exist in the data.}\n}\n\\value{\nA data frame.\n}\n\\description{\nTools for working with row names or row ids\n}\n\\details{\nThese are similar to \\code{tibble}'s functions \\code{column_to_rownames()},\n\\code{rownames_to_column()} and \\code{rowid_to_column()}. Note that the behavior of\n\\code{rowid_as_column()} is different for grouped dataframe: instead of making\nthe rowid unique across the full dataframe, it creates rowid per group.\nTherefore, there can be several rows with the same rowid if they belong to\ndifferent groups.\n\nIf you are familiar with \\code{dplyr}, this is similar to doing the following:\n\n\\if{html}{\\out{<div class=\"sourceCode r\">}}\\preformatted{data |>\n  group_by(grp) |>\n  mutate(id = row_number()) |>\n  ungroup()\n}\\if{html}{\\out{</div>}}\n}\n\\examples{\n# Convert between row names and column --------------------------------\ntest <- rownames_as_column(mtcars, var = \"car\")\ntest\nhead(column_as_rownames(test, var = \"car\"))\n\ntest_data <- head(iris)\n\nrowid_as_column(test_data)\nrowid_as_column(test_data, var = \"my_id\")\n}\n"
  },
  {
    "path": "man/skewness.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/skewness_kurtosis.R\n\\name{skewness}\n\\alias{skewness}\n\\alias{skewness.numeric}\n\\alias{kurtosis}\n\\alias{kurtosis.numeric}\n\\alias{print.parameters_kurtosis}\n\\alias{print.parameters_skewness}\n\\alias{summary.parameters_skewness}\n\\alias{summary.parameters_kurtosis}\n\\title{Compute Skewness and (Excess) Kurtosis}\n\\usage{\nskewness(x, ...)\n\n\\method{skewness}{numeric}(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  verbose = TRUE,\n  ...\n)\n\nkurtosis(x, ...)\n\n\\method{kurtosis}{numeric}(\n  x,\n  remove_na = TRUE,\n  type = \"2\",\n  iterations = NULL,\n  verbose = TRUE,\n  ...\n)\n\n\\method{print}{parameters_kurtosis}(x, digits = 3, test = FALSE, ...)\n\n\\method{print}{parameters_skewness}(x, digits = 3, test = FALSE, ...)\n\n\\method{summary}{parameters_skewness}(object, test = FALSE, ...)\n\n\\method{summary}{parameters_kurtosis}(object, test = FALSE, ...)\n}\n\\arguments{\n\\item{x}{A numeric vector or data.frame.}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{remove_na}{Logical. Should \\code{NA} values be removed before computing (\\code{TRUE})\nor not (\\code{FALSE}, default)?}\n\n\\item{type}{Type of algorithm for computing skewness. May be one of \\code{1}\n(or \\code{\"1\"}, \\code{\"I\"} or \\code{\"classic\"}), \\code{2} (or \\code{\"2\"},\n\\code{\"II\"} or \\code{\"SPSS\"} or \\code{\"SAS\"}) or \\code{3} (or  \\code{\"3\"},\n\\code{\"III\"} or \\code{\"Minitab\"}). See 'Details'.}\n\n\\item{iterations}{The number of bootstrap replicates for computing standard\nerrors. If \\code{NULL} (default), parametric standard errors are computed.}\n\n\\item{verbose}{Toggle warnings and messages.}\n\n\\item{digits}{Number of decimal places.}\n\n\\item{test}{Logical, if \\code{TRUE}, tests if skewness or kurtosis is\nsignificantly different from zero.}\n\n\\item{object}{An object returned by \\code{skewness()} or \\code{kurtosis()}.}\n}\n\\value{\nValues of skewness or kurtosis.\n}\n\\description{\nCompute Skewness and (Excess) Kurtosis\n}\n\\details{\n\\subsection{Skewness}{\nSymmetric distributions have a \\code{skewness} around zero, while\na negative skewness values indicates a \"left-skewed\" distribution, and a\npositive skewness values indicates a \"right-skewed\" distribution. Examples\nfor the relationship of skewness and distributions are:\n\\itemize{\n\\item Normal distribution (and other symmetric distribution) has a skewness\nof 0\n\\item Half-normal distribution has a skewness just below 1\n\\item Exponential distribution has a skewness of 2\n\\item Lognormal distribution can have a skewness of any positive value,\ndepending on its parameters\n}\n\n(\\cite{https://en.wikipedia.org/wiki/Skewness})\n}\n\n\\subsection{Types of Skewness}{\n\\code{skewness()} supports three different methods for estimating skewness,\nas discussed in \\cite{Joanes and Gill (1988)}:\n\\itemize{\n\\item Type \"1\" is the \"classical\" method, which is \\code{g1 = (sum((x - mean(x))^3) / n) / (sum((x - mean(x))^2) / n)^1.5}\n\\item Type \"2\" first calculates the type-1 skewness, then adjusts the result:\n\\code{G1 = g1 * sqrt(n * (n - 1)) / (n - 2)}. This is what SAS and SPSS\nusually return.\n\\item Type \"3\" first calculates the type-1 skewness, then adjusts the result:\n\\code{b1 = g1 * ((1 - 1 / n))^1.5}. This is what Minitab usually returns.\n}\n}\n\n\\subsection{Kurtosis}{\nThe \\code{kurtosis} is a measure of \"tailedness\" of a distribution. A\ndistribution with a kurtosis values of about zero is called \"mesokurtic\". A\nkurtosis value larger than zero indicates a \"leptokurtic\" distribution with\n\\emph{fatter} tails. A kurtosis value below zero indicates a \"platykurtic\"\ndistribution with \\emph{thinner} tails\n(\\cite{https://en.wikipedia.org/wiki/Kurtosis}).\n}\n\n\\subsection{Types of Kurtosis}{\n\\code{kurtosis()} supports three different methods for estimating kurtosis,\nas discussed in \\cite{Joanes and Gill (1988)}:\n\\itemize{\n\\item Type \"1\" is the \"classical\" method, which is \\code{g2 = n * sum((x - mean(x))^4) / (sum((x - mean(x))^2)^2) - 3}.\n\\item Type \"2\" first calculates the type-1 kurtosis, then adjusts the result:\n\\code{G2 = ((n + 1) * g2 + 6) * (n - 1)/((n - 2) * (n - 3))}. This is what\nSAS and SPSS usually return\n\\item Type \"3\" first calculates the type-1 kurtosis, then adjusts the result:\n\\code{b2 = (g2 + 3) * (1 - 1 / n)^2 - 3}. This is what Minitab usually\nreturns.\n}\n\n}\n\n\\subsection{Standard Errors}{\nIt is recommended to compute empirical (bootstrapped) standard errors (via\nthe \\code{iterations} argument) than relying on analytic standard errors\n(\\cite{Wright & Herrington, 2011}).\n}\n}\n\\examples{\nskewness(rnorm(1000))\nkurtosis(rnorm(1000))\n}\n\\references{\n\\itemize{\n\\item D. N. Joanes and C. A. Gill (1998). Comparing measures of sample\nskewness and kurtosis. The Statistician, 47, 183–189.\n\\item Wright, D. B., & Herrington, J. A. (2011). Problematic standard\nerrors and confidence intervals for skewness and kurtosis. Behavior\nresearch methods, 43(1), 8-17.\n}\n}\n"
  },
  {
    "path": "man/slide.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/slide.R\n\\name{slide}\n\\alias{slide}\n\\alias{slide.numeric}\n\\alias{slide.data.frame}\n\\title{Shift numeric value range}\n\\usage{\nslide(x, ...)\n\n\\method{slide}{numeric}(x, lowest = 0, ...)\n\n\\method{slide}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  lowest = 0,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A data frame or numeric vector.}\n\n\\item{...}{not used.}\n\n\\item{lowest}{Numeric, indicating the lowest (minimum) value when converting\nfactors or character vectors to numeric values.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, recoded or converted variables\nget new column names and are appended (column bind) to \\code{x}, thus returning\nboth the original and the recoded variables. The new columns get a suffix,\nbased on the calling function: \\code{\"_r\"} for recode functions, \\code{\"_n\"} for\n\\code{to_numeric()}, \\code{\"_f\"} for \\code{to_factor()}, or \\code{\"_s\"} for\n\\code{slide()}. If \\code{append=FALSE}, original variables in \\code{x} will be\noverwritten by their recoded versions. If a character value, recoded\nvariables are appended with new column names (using the defined suffix) to\nthe original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\n\\code{x}, where the range of numeric variables starts at a new value.\n}\n\\description{\nThis functions shifts the value range of a numeric variable, so that the\nnew range starts at a given value.\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\n# numeric\nhead(mtcars$gear)\nhead(slide(mtcars$gear))\nhead(slide(mtcars$gear, lowest = 10))\n\n# data frame\nsapply(slide(mtcars, lowest = 1), min)\nsapply(mtcars, min)\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "man/smoothness.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/smoothness.R\n\\name{smoothness}\n\\alias{smoothness}\n\\title{Quantify the smoothness of a vector}\n\\usage{\nsmoothness(x, method = \"cor\", lag = 1, iterations = NULL, ...)\n}\n\\arguments{\n\\item{x}{Numeric vector (similar to a time series).}\n\n\\item{method}{Can be \\code{\"diff\"} (the standard deviation of the standardized\ndifferences) or \\code{\"cor\"} (default, lag-one autocorrelation).}\n\n\\item{lag}{An integer indicating which lag to use. If less than \\code{1}, will be\ninterpreted as expressed in percentage of the length of the vector.}\n\n\\item{iterations}{The number of bootstrap replicates for computing standard\nerrors. If \\code{NULL} (default), parametric standard errors are computed.}\n\n\\item{...}{Arguments passed to or from other methods.}\n}\n\\value{\nValue of smoothness.\n}\n\\description{\nQuantify the smoothness of a vector\n}\n\\examples{\nx <- (-10:10)^3 + rnorm(21, 0, 100)\nplot(x)\nsmoothness(x, method = \"cor\")\nsmoothness(x, method = \"diff\")\n}\n\\references{\nhttps://stats.stackexchange.com/questions/24607/how-to-measure-smoothness-of-a-time-series-in-r\n}\n"
  },
  {
    "path": "man/standardize.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/standardize.R, R/unstandardize.R\n\\name{standardize}\n\\alias{standardize}\n\\alias{standardise}\n\\alias{standardize.numeric}\n\\alias{standardize.factor}\n\\alias{standardize.data.frame}\n\\alias{unstandardize}\n\\alias{unstandardise}\n\\alias{unstandardize.numeric}\n\\alias{unstandardize.data.frame}\n\\title{Standardization (Z-scoring)}\n\\usage{\nstandardize(x, ...)\n\nstandardise(x, ...)\n\n\\method{standardize}{numeric}(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  scale = NULL,\n  verbose = TRUE,\n  ...\n)\n\n\\method{standardize}{factor}(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = NULL,\n  force = FALSE,\n  verbose = TRUE,\n  ...\n)\n\n\\method{standardize}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = NULL,\n  reference = NULL,\n  center = NULL,\n  scale = NULL,\n  remove_na = c(\"none\", \"selected\", \"all\"),\n  force = FALSE,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n\nunstandardize(x, ...)\n\nunstandardise(x, ...)\n\n\\method{unstandardize}{numeric}(\n  x,\n  center = NULL,\n  scale = NULL,\n  reference = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  ...\n)\n\n\\method{unstandardize}{data.frame}(\n  x,\n  center = NULL,\n  scale = NULL,\n  reference = NULL,\n  robust = FALSE,\n  two_sd = FALSE,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A (grouped) data frame, a vector or a statistical model (for\n\\code{unstandardize()} cannot be a model).}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{robust}{Logical, if \\code{TRUE}, centering is done by subtracting the\nmedian from the variables and dividing it by the median absolute deviation\n(MAD). If \\code{FALSE}, variables are standardized by subtracting the\nmean and dividing it by the standard deviation (SD).}\n\n\\item{two_sd}{If \\code{TRUE}, the variables are scaled by two times the deviation\n(SD or MAD depending on \\code{robust}). This method can be useful to obtain\nmodel coefficients of continuous parameters comparable to coefficients\nrelated to binary predictors, when applied to \\strong{the predictors} (not the\noutcome) (Gelman, 2008).}\n\n\\item{weights}{Can be \\code{NULL} (for no weighting), or:\n\\itemize{\n\\item For model: if \\code{TRUE} (default), a weighted-standardization is carried out.\n\\item For \\code{data.frame}s: a numeric vector of weights, or a character of the\nname of a column in the \\code{data.frame} that contains the weights.\n\\item For numeric vectors: a numeric vector of weights.\n}}\n\n\\item{reference}{A data frame or variable from which the centrality and\ndeviation will be computed instead of from the input variable. Useful for\nstandardizing a subset or new data according to another data frame.}\n\n\\item{center, scale}{\\itemize{\n\\item For \\code{standardize()}: \\cr\nNumeric values, which can be used as alternative to \\code{reference} to define\na reference centrality and deviation. If \\code{scale} and \\code{center} are of\nlength 1, they will be recycled to match the length of selected variables\nfor standardization. Else, \\code{center} and \\code{scale} must be of same length as\nthe number of selected variables. Values in \\code{center} and \\code{scale} will be\nmatched to selected variables in the provided order, unless a named vector\nis given. In this case, names are matched against the names of the selected\nvariables.\n\\item For \\code{unstandardize()}: \\cr\n\\code{center} and \\code{scale} correspond to the center (the mean / median) and the scale (SD / MAD) of\nthe original non-standardized data (for data frames, should be named, or\nhave column order correspond to the numeric column). However, one can also\ndirectly provide the original data through \\code{reference}, from which the\ncenter and the scale will be computed (according to \\code{robust} and \\code{two_sd}).\nAlternatively, if the input contains the attributes \\code{center} and \\code{scale}\n(as does the output of \\code{standardize()}), it will take it from there if the\nrest of the arguments are absent.\n}}\n\n\\item{verbose}{Toggle warnings and messages on or off.}\n\n\\item{force}{Logical, if \\code{TRUE}, forces recoding of factors and character\nvectors as well.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{remove_na}{How should missing values (\\code{NA}) be treated: if \\code{\"none\"}\n(default): each column's standardization is done separately, ignoring\n\\code{NA}s. Else, rows with \\code{NA} in the columns selected with \\code{select} /\n\\code{exclude} (\\code{\"selected\"}) or in all columns (\\code{\"all\"}) are dropped before\nstandardization, and the resulting data frame does not include these cases.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, standardized variables get new\ncolumn names (with the suffix \\code{\"_z\"}) and are appended (column bind) to \\code{x},\nthus returning both the original and the standardized variables. If \\code{FALSE},\noriginal variables in \\code{x} will be overwritten by their standardized versions.\nIf a character value, standardized variables are appended with new column\nnames (using the defined suffix) to the original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\nThe standardized object (either a standardize data frame or a\nstatistical model fitted on standardized data).\n}\n\\description{\nPerforms a standardization of data (z-scoring), i.e., centering and scaling,\nso that the data is expressed in terms of standard deviation (i.e., mean = 0,\nSD = 1) or Median Absolute Deviance (median = 0, MAD = 1). When applied to a\nstatistical model, this function extracts the dataset, standardizes it, and\nrefits the model with this standardized version of the dataset. The\n\\code{\\link[=normalize]{normalize()}} function can also be used to scale all numeric variables within\nthe 0 - 1 range.\n\\cr\\cr\nFor model standardization, see \\code{\\link[=standardize.default]{standardize.default()}}.\n}\n\\note{\nWhen \\code{x} is a vector or a data frame with \\verb{remove_na = \"none\")},\nmissing values are preserved, so the return value has the same length /\nnumber of rows as the original input.\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\nd <- iris[1:4, ]\n\n# vectors\nstandardise(d$Petal.Length)\n\n# Data frames\n# overwrite\nstandardise(d, select = c(\"Sepal.Length\", \"Sepal.Width\"))\n\n# append\nstandardise(d, select = c(\"Sepal.Length\", \"Sepal.Width\"), append = TRUE)\n\n# append, suffix\nstandardise(d, select = c(\"Sepal.Length\", \"Sepal.Width\"), append = \"_std\")\n\n# standardizing with reference center and scale\nd <- data.frame(\n  a = c(-2, -1, 0, 1, 2),\n  b = c(3, 4, 5, 6, 7)\n)\n\n# default standardization, based on mean and sd of each variable\nstandardize(d) # means are 0 and 5, sd ~ 1.581139\n\n# standardization, based on mean and sd set to the same values\nstandardize(d, center = c(0, 5), scale = c(1.581, 1.581))\n\n# standardization, mean and sd for each variable newly defined\nstandardize(d, center = c(3, 4), scale = c(2, 4))\n\n# standardization, taking same mean and sd for each variable\nstandardize(d, center = 1, scale = 3)\n}\n\\seealso{\nSee \\code{\\link[=center]{center()}} for grand-mean centering of variables, and\n\\code{\\link[=makepredictcall.dw_transformer]{makepredictcall.dw_transformer()}} for use in model formulas.\n\nOther transform utilities: \n\\code{\\link{normalize}()},\n\\code{\\link{ranktransform}()},\n\\code{\\link{rescale}()},\n\\code{\\link{reverse}()}\n\nOther standardize: \n\\code{\\link{standardize.default}()}\n}\n\\concept{standardize}\n\\concept{transform utilities}\n"
  },
  {
    "path": "man/standardize.default.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/standardize.models.R\n\\name{standardize.default}\n\\alias{standardize.default}\n\\alias{standardize_models}\n\\title{Re-fit a model with standardized data}\n\\usage{\n\\method{standardize}{default}(\n  x,\n  robust = FALSE,\n  two_sd = FALSE,\n  weights = TRUE,\n  verbose = TRUE,\n  include_response = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A statistical model.}\n\n\\item{robust}{Logical, if \\code{TRUE}, centering is done by subtracting the\nmedian from the variables and dividing it by the median absolute deviation\n(MAD). If \\code{FALSE}, variables are standardized by subtracting the\nmean and dividing it by the standard deviation (SD).}\n\n\\item{two_sd}{If \\code{TRUE}, the variables are scaled by two times the deviation\n(SD or MAD depending on \\code{robust}). This method can be useful to obtain\nmodel coefficients of continuous parameters comparable to coefficients\nrelated to binary predictors, when applied to \\strong{the predictors} (not the\noutcome) (Gelman, 2008).}\n\n\\item{weights}{If \\code{TRUE} (default), a weighted-standardization is carried out.}\n\n\\item{verbose}{Toggle warnings and messages on or off.}\n\n\\item{include_response}{If \\code{TRUE} (default), the response value will also be\nstandardized. If \\code{FALSE}, only the predictors will be standardized.\n\\itemize{\n\\item Note that for GLMs and models with non-linear link functions, the\nresponse value will not be standardized, to make re-fitting the model work.\n\\item If the model contains an \\code{\\link[stats:offset]{stats::offset()}}, the offset variable(s) will\nbe standardized only if the response is standardized. If \\code{two_sd = TRUE},\noffsets are standardized by one-sd (similar to the response).\n\\item (For \\code{mediate} models, the \\code{include_response} refers to the outcome in\nthe y model; m model's response will always be standardized when possible).\n}}\n\n\\item{...}{Arguments passed to or from other methods.}\n}\n\\value{\nA statistical model fitted on standardized data\n}\n\\description{\nPerforms a standardization of data (z-scoring) using\n\\code{\\link[=standardize]{standardize()}} and then re-fits the model to the standardized data.\n\\cr\\cr\nStandardization is done by completely refitting the model on the standardized\ndata. Hence, this approach is equal to standardizing the variables \\emph{before}\nfitting the model and will return a new model object. This method is\nparticularly recommended for complex models that include interactions or\ntransformations (e.g., polynomial or spline terms). The \\code{robust} (default to\n\\code{FALSE}) argument enables a robust standardization of data, based on the\n\\code{median} and the \\code{MAD} instead of the \\code{mean} and the \\code{SD}.\n}\n\\section{Generalized Linear Models}{\nStandardization for generalized linear models (GLM, GLMM, etc) is done only\nwith respect to the predictors (while the outcome remains as-is,\nunstandardized) - maintaining the interpretability of the coefficients (e.g.,\nin a binomial model: the exponent of the standardized parameter is the OR of\na change of 1 SD in the predictor, etc.)\n}\n\n\\section{Dealing with Factors}{\n\\code{standardize(model)} or \\code{standardize_parameters(model, method = \"refit\")} do\n\\emph{not} standardize categorical predictors (i.e. factors) / their\ndummy-variables, which may be a different behaviour compared to other R\npackages (such as \\strong{lm.beta}) or other software packages (like SPSS). To\nmimic such behaviours, either use \\code{standardize_parameters(model, method = \"basic\")} to obtain post-hoc standardized parameters, or standardize the data\nwith \\code{standardize(data, force = TRUE)} \\emph{before} fitting the\nmodel.\n}\n\n\\section{Transformed Variables}{\nWhen the model's formula contains transformations (e.g. \\code{y ~ exp(X)}) the\ntransformation effectively takes place after standardization (e.g.,\n\\code{exp(scale(X))}). Since some transformations are undefined for none positive\nvalues, such as \\code{log()} and \\code{sqrt()}, the relevel variables are shifted (post\nstandardization) by \\code{Z - min(Z) + 1} or \\code{Z - min(Z)} (respectively).\n}\n\n\\examples{\nmodel <- lm(Infant.Mortality ~ Education * Fertility, data = swiss)\ncoef(standardize(model))\n\n}\n\\seealso{\nOther standardize: \n\\code{\\link{standardize}()}\n}\n\\concept{standardize}\n"
  },
  {
    "path": "man/text_format.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/text_format.R\n\\name{text_format}\n\\alias{text_format}\n\\alias{text_fullstop}\n\\alias{text_lastchar}\n\\alias{text_concatenate}\n\\alias{text_paste}\n\\alias{text_remove}\n\\alias{text_wrap}\n\\title{Convenient text formatting functionalities}\n\\usage{\ntext_format(\n  text,\n  sep = \", \",\n  last = \" and \",\n  width = NULL,\n  enclose = NULL,\n  ...\n)\n\ntext_fullstop(text)\n\ntext_lastchar(text, n = 1)\n\ntext_concatenate(text, sep = \", \", last = \" and \", enclose = NULL)\n\ntext_paste(text, text2 = NULL, sep = \", \", enclose = NULL, ...)\n\ntext_remove(text, pattern = \"\", ...)\n\ntext_wrap(text, width = NULL, ...)\n}\n\\arguments{\n\\item{text, text2}{A character string.}\n\n\\item{sep}{Separator.}\n\n\\item{last}{Last separator.}\n\n\\item{width}{Positive integer giving the target column width for wrapping\nlines in the output. Can be \"auto\", in which case it will select 90\\\\% of the\ndefault width.}\n\n\\item{enclose}{Character that will be used to wrap elements of \\code{text}, so\nthese can be, e.g., enclosed with quotes or backticks. If \\code{NULL} (default),\ntext elements will not be enclosed.}\n\n\\item{...}{Other arguments to be passed to or from other functions.}\n\n\\item{n}{The number of characters to find.}\n\n\\item{pattern}{Regex pattern to remove from \\code{text}.}\n}\n\\value{\nA character string.\n}\n\\description{\nConvenience functions to manipulate and format text.\n}\n\\examples{\n# Add full stop if missing\ntext_fullstop(c(\"something\", \"something else.\"))\n\n# Find last characters\ntext_lastchar(c(\"ABC\", \"DEF\"), n = 2)\n\n# Smart concatenation\ntext_concatenate(c(\"First\", \"Second\", \"Last\"))\ntext_concatenate(c(\"First\", \"Second\", \"Last\"), last = \" or \", enclose = \"`\")\n\n# Remove parts of string\ntext_remove(c(\"one!\", \"two\", \"three!\"), \"!\")\n\n# Wrap text\nlong_text <- paste(rep(\"abc \", 100), collapse = \"\")\ncat(text_wrap(long_text, width = 50))\n\n# Paste with optional separator\ntext_paste(c(\"A\", \"\", \"B\"), c(\"42\", \"42\", \"42\"))\n}\n"
  },
  {
    "path": "man/to_factor.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/to_factor.R\n\\name{to_factor}\n\\alias{to_factor}\n\\alias{to_factor.numeric}\n\\alias{to_factor.data.frame}\n\\title{Convert data to factors}\n\\usage{\nto_factor(x, ...)\n\n\\method{to_factor}{numeric}(x, labels_to_levels = TRUE, verbose = TRUE, ...)\n\n\\method{to_factor}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  ignore_case = FALSE,\n  append = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A data frame or vector.}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{labels_to_levels}{Logical, if \\code{TRUE}, value labels are used as factor\nlevels after \\code{x} was converted to factor. Else, factor levels are based on\nthe values of \\code{x} (i.e. as if using \\code{as.factor()}).}\n\n\\item{verbose}{Toggle warnings.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, recoded or converted variables\nget new column names and are appended (column bind) to \\code{x}, thus returning\nboth the original and the recoded variables. The new columns get a suffix,\nbased on the calling function: \\code{\"_r\"} for recode functions, \\code{\"_n\"} for\n\\code{to_numeric()}, \\code{\"_f\"} for \\code{to_factor()}, or \\code{\"_s\"} for\n\\code{slide()}. If \\code{append=FALSE}, original variables in \\code{x} will be\noverwritten by their recoded versions. If a character value, recoded\nvariables are appended with new column names (using the defined suffix) to\nthe original data frame.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n}\n\\value{\nA factor, or a data frame of factors.\n}\n\\description{\nConvert data to factors\n}\n\\details{\nConvert variables or data into factors. If the data is labelled, value labels\nwill be used as factor levels. The counterpart to convert variables into\nnumeric is \\code{to_numeric()}.\n}\n\\note{\nFactors are ignored and returned as is. If you want to use value labels\nas levels for factors, use \\code{\\link[=labels_to_levels]{labels_to_levels()}} instead.\n}\n\\section{Selection of variables - the \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument (including this function),\nthe complete input data frame is returned, even when \\code{select} only selects\na range of variables. That is, the function is only applied to those variables\nthat have a match in \\code{select}, while all other variables remain unchanged.\nIn other words: for this function, \\code{select} will not omit any non-included\nvariables, so that the returned data frame will include all variables\nfrom the input data frame.\n}\n\n\\examples{\nstr(to_factor(iris))\n\n# use labels as levels\ndata(efc)\nstr(efc$c172code)\nhead(to_factor(efc$c172code))\n}\n"
  },
  {
    "path": "man/to_numeric.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/to_numeric.R\n\\name{to_numeric}\n\\alias{to_numeric}\n\\alias{to_numeric.data.frame}\n\\title{Convert data to numeric}\n\\usage{\nto_numeric(x, ...)\n\n\\method{to_numeric}{data.frame}(\n  x,\n  select = NULL,\n  exclude = NULL,\n  dummy_factors = FALSE,\n  preserve_levels = FALSE,\n  lowest = NULL,\n  append = FALSE,\n  ignore_case = FALSE,\n  regex = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{A data frame, factor or vector.}\n\n\\item{...}{Arguments passed to or from other methods.}\n\n\\item{select}{Variables that will be included when performing the required\ntasks. Can be either\n\\itemize{\n\\item a variable specified as a literal variable name (e.g., \\code{column_name}),\n\\item a string with the variable name (e.g., \\code{\"column_name\"}), a character\nvector of variable names (e.g., \\code{c(\"col1\", \"col2\", \"col3\")}), or a\ncharacter vector of variable names including ranges specified via \\code{:}\n(e.g., \\code{c(\"col1:col3\", \"col5\")}),\n\\item for some functions, like \\code{data_select()} or \\code{data_rename()}, \\code{select} can\nbe a named character vector. In this case, the names are used to rename\nthe columns in the output data frame. See 'Details' in the related\nfunctions to see where this option applies.\n\\item a formula with variable names (e.g., \\code{~column_1 + column_2}),\n\\item a vector of positive integers, giving the positions counting from the left\n(e.g. \\code{1} or \\code{c(1, 3, 5)}),\n\\item a vector of negative integers, giving the positions counting from the\nright (e.g., \\code{-1} or \\code{-1:-3}),\n\\item one of the following select-helpers: \\code{starts_with()}, \\code{ends_with()},\n\\code{contains()}, a range using \\code{:}, or \\code{regex()}. \\code{starts_with()},\n\\code{ends_with()}, and  \\code{contains()} accept several patterns, e.g\n\\code{starts_with(\"Sep\", \"Petal\")}. \\code{regex()} can be used to define regular\nexpression patterns.\n\\item a function testing for logical conditions, e.g. \\code{is.numeric()} (or\n\\code{is.numeric}), or any user-defined function that selects the variables\nfor which the function returns \\code{TRUE} (like: \\code{foo <- function(x) mean(x) > 3}),\n\\item ranges specified via literal variable names, select-helpers (except\n\\code{regex()}) and (user-defined) functions can be negated, i.e. return\nnon-matching elements, when prefixed with a \\code{-}, e.g. \\code{-ends_with()},\n\\code{-is.numeric} or \\code{-(Sepal.Width:Petal.Length)}. \\strong{Note:} Negation means\nthat matches are \\emph{excluded}, and thus, the \\code{exclude} argument can be\nused alternatively. For instance, \\code{select=-ends_with(\"Length\")} (with\n\\code{-}) is equivalent to \\code{exclude=ends_with(\"Length\")} (no \\code{-}). In case\nnegation should not work as expected, use the \\code{exclude} argument instead.\n}\n\nIf \\code{NULL}, selects all columns. Patterns that found no matches are silently\nignored, e.g. \\code{extract_column_names(iris, select = c(\"Species\", \"Test\"))}\nwill just return \\code{\"Species\"}.}\n\n\\item{exclude}{See \\code{select}, however, column names matched by the pattern\nfrom \\code{exclude} will be excluded instead of selected. If \\code{NULL} (the default),\nexcludes no columns.}\n\n\\item{dummy_factors}{Transform factors to dummy factors (all factor levels as\ndifferent columns filled with a binary 0-1 value).}\n\n\\item{preserve_levels}{Logical, only applies if \\code{x} is a factor. If \\code{TRUE},\nand \\code{x} has numeric factor levels, these will be converted into the related\nnumeric values. If this is not possible, the converted numeric values will\nstart from 1 to number of levels.}\n\n\\item{lowest}{Numeric, indicating the lowest (minimum) value when converting\nfactors or character vectors to numeric values.}\n\n\\item{append}{Logical or string. If \\code{TRUE}, recoded or converted variables\nget new column names and are appended (column bind) to \\code{x}, thus returning\nboth the original and the recoded variables. The new columns get a suffix,\nbased on the calling function: \\code{\"_r\"} for recode functions, \\code{\"_n\"} for\n\\code{to_numeric()}, \\code{\"_f\"} for \\code{to_factor()}, or \\code{\"_s\"} for\n\\code{slide()}. If \\code{append=FALSE}, original variables in \\code{x} will be\noverwritten by their recoded versions. If a character value, recoded\nvariables are appended with new column names (using the defined suffix) to\nthe original data frame.}\n\n\\item{ignore_case}{Logical, if \\code{TRUE} and when one of the select-helpers or\na regular expression is used in \\code{select}, ignores lower/upper case in the\nsearch pattern when matching against variable names.}\n\n\\item{regex}{Logical, if \\code{TRUE}, the search pattern from \\code{select} will be\ntreated as regular expression. When \\code{regex = TRUE}, select \\emph{must} be a\ncharacter string (or a variable containing a character string) and is not\nallowed to be one of the supported select-helpers or a character vector\nof length > 1. \\code{regex = TRUE} is comparable to using one of the two\nselect-helpers, \\code{select = contains()} or \\code{select = regex()}, however,\nsince the select-helpers may not work when called from inside other\nfunctions (see 'Details'), this argument may be used as workaround.}\n\n\\item{verbose}{Toggle warnings.}\n}\n\\value{\nA data frame of numeric variables.\n}\n\\description{\nConvert data to numeric by converting characters to factors and factors to\neither numeric levels or dummy variables. The \"counterpart\" to convert\nvariables into factors is \\code{to_factor()}.\n}\n\\note{\nWhen factors should be converted into multiple \"binary\" dummies, i.e.\neach factor level is converted into a separate column filled with a binary\n0-1 value, set \\code{dummy_factors = TRUE}. If you want to preserve the original\nfactor levels (in case these represent numeric values), use\n\\code{preserve_levels = TRUE}.\n}\n\\section{Selection of variables - \\code{select} argument}{\n\nFor most functions that have a \\code{select} argument the complete input data\nframe is returned, even when \\code{select} only selects a range of variables.\nHowever, for \\code{to_numeric()}, factors might be converted into dummies,\nthus, the number of variables of the returned data frame no longer match\nthe input data frame. Hence, when \\code{select} is used, \\emph{only} those variables\n(or their dummies) specified in \\code{select} will be returned. Use \\code{append=TRUE}\nto also include the original variables in the returned data frame.\n}\n\n\\examples{\nto_numeric(head(ToothGrowth))\nto_numeric(head(ToothGrowth), dummy_factors = TRUE)\n\n# factors\nx <- as.factor(mtcars$gear)\nto_numeric(x)\nto_numeric(x, preserve_levels = TRUE)\n# same as:\ncoerce_to_numeric(x)\n\n}\n"
  },
  {
    "path": "man/visualisation_recipe.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/visualisation_recipe.R\n\\name{visualisation_recipe}\n\\alias{visualisation_recipe}\n\\title{Prepare objects for visualisation}\n\\usage{\nvisualisation_recipe(x, ...)\n}\n\\arguments{\n\\item{x}{An \\code{easystats} object.}\n\n\\item{...}{Other arguments passed to other functions.}\n}\n\\description{\nThis function prepares objects for visualisation by returning a list of\nlayers with data and geoms that can be easily plotted using for instance\n\\code{ggplot2}.\n\nIf the \\code{see} package is installed, the call to \\code{visualization_recipe()} can be\nreplaced by \\code{plot()}, which will internally call the former and then plot it\nusing \\code{ggplot}. The resulting plot can be customized ad-hoc (by adding\nggplot's geoms, theme or specifications), or via some of the arguments\nof \\code{visualisation_recipe()} that control the aesthetic parameters.\n\nSee the specific documentation page for your object's class:\n\\itemize{\n\\item {modelbased}: \\url{https://easystats.github.io/modelbased/reference/visualisation_recipe.estimate_predicted.html}\n\\item {correlation}: \\url{https://easystats.github.io/correlation/reference/visualisation_recipe.easycormatrix.html}\n}\n}\n"
  },
  {
    "path": "man/weighted_mean.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/weighted_mean_median_sd_mad.R\n\\name{weighted_mean}\n\\alias{weighted_mean}\n\\alias{weighted_median}\n\\alias{weighted_sd}\n\\alias{weighted_mad}\n\\title{Weighted Mean, Median, SD, and MAD}\n\\usage{\nweighted_mean(x, weights = NULL, remove_na = TRUE, verbose = TRUE, ...)\n\nweighted_median(x, weights = NULL, remove_na = TRUE, verbose = TRUE, ...)\n\nweighted_sd(x, weights = NULL, remove_na = TRUE, verbose = TRUE, ...)\n\nweighted_mad(\n  x,\n  weights = NULL,\n  constant = 1.4826,\n  remove_na = TRUE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{x}{an object containing the values whose weighted mean is to be\n    computed.}\n\n\\item{weights}{A numerical vector of weights the same length as \\code{x} giving\nthe weights to use for elements of \\code{x}. If \\code{weights = NULL}, \\code{x} is passed\nto the non-weighted function.}\n\n\\item{remove_na}{Logical, if \\code{TRUE} (default), removes missing (\\code{NA}) and infinite\nvalues from \\code{x} and \\code{weights}.}\n\n\\item{verbose}{Show warning when \\code{weights} are negative?}\n\n\\item{...}{arguments to be passed to or from methods.}\n\n\\item{constant}{scale factor.}\n}\n\\description{\nWeighted Mean, Median, SD, and MAD\n}\n\\examples{\n## GPA from Siegel 1994\nx <- c(3.7, 3.3, 3.5, 2.8)\nwt <- c(5, 5, 4, 1) / 15\n\nweighted_mean(x, wt)\nweighted_median(x, wt)\n\nweighted_sd(x, wt)\nweighted_mad(x, wt)\n\n}\n"
  },
  {
    "path": "man/winsorize.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/winsorize.R\n\\name{winsorize}\n\\alias{winsorize}\n\\alias{winsorize.numeric}\n\\title{Winsorize data}\n\\usage{\nwinsorize(data, ...)\n\n\\method{winsorize}{numeric}(\n  data,\n  threshold = 0.2,\n  method = \"percentile\",\n  robust = FALSE,\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{data}{data frame or vector.}\n\n\\item{...}{Currently not used.}\n\n\\item{threshold}{The amount of winsorization, depends on the value of \\code{method}:\n\\itemize{\n\\item For \\code{method = \"percentile\"}: the amount to winsorize from \\emph{each} tail.\nThe value of \\code{threshold} must be between 0 and 0.5 and of length 1.\n\\item For \\code{method = \"zscore\"}: the number of \\emph{SD}/\\emph{MAD}-deviations from the\n\\emph{mean}/\\emph{median} (see \\code{robust}). The value of \\code{threshold} must be greater\nthan 0 and of length 1.\n\\item For \\code{method = \"raw\"}: a vector of length 2 with the lower and upper bound\nfor winsorization.\n}}\n\n\\item{method}{One of \"percentile\" (default), \"zscore\", or \"raw\".}\n\n\\item{robust}{Logical, if TRUE, winsorizing through the \"zscore\" method is\ndone via the median and the median absolute deviation (MAD); if FALSE, via\nthe mean and the standard deviation.}\n\n\\item{verbose}{Not used anymore since \\code{datawizard} 0.6.6.}\n}\n\\value{\nA data frame with winsorized columns or a winsorized vector.\n}\n\\description{\nWinsorize data\n}\n\\details{\nWinsorizing or winsorization is the transformation of statistics by limiting\nextreme values in the statistical data to reduce the effect of possibly\nspurious outliers. The distribution of many statistics can be heavily\ninfluenced by outliers. A typical strategy is to set all outliers (values\nbeyond a certain threshold) to a specified percentile of the data; for\nexample, a \\verb{90\\%} winsorization would see all data below the 5th percentile set\nto the 5th percentile, and data above the 95th percentile set to the 95th\npercentile. Winsorized estimators are usually more robust to outliers than\ntheir more standard forms.\n}\n\\examples{\nhist(iris$Sepal.Length, main = \"Original data\")\n\nhist(winsorize(iris$Sepal.Length, threshold = 0.2),\n  xlim = c(4, 8), main = \"Percentile Winsorization\"\n)\n\nhist(winsorize(iris$Sepal.Length, threshold = 1.5, method = \"zscore\"),\n  xlim = c(4, 8), main = \"Mean (+/- SD) Winsorization\"\n)\n\nhist(winsorize(iris$Sepal.Length, threshold = 1.5, method = \"zscore\", robust = TRUE),\n  xlim = c(4, 8), main = \"Median (+/- MAD) Winsorization\"\n)\n\nhist(winsorize(iris$Sepal.Length, threshold = c(5, 7.5), method = \"raw\"),\n  xlim = c(4, 8), main = \"Raw Thresholds\"\n)\n\n# Also works on a data frame:\nwinsorize(iris, threshold = 0.2)\n\n}\n\\seealso{\n\\itemize{\n\\item Add a prefix or suffix to column names: \\code{\\link[=data_addprefix]{data_addprefix()}}, \\code{\\link[=data_addsuffix]{data_addsuffix()}}\n\\item Functions to reorder or remove columns: \\code{\\link[=data_reorder]{data_reorder()}}, \\code{\\link[=data_relocate]{data_relocate()}},\n\\code{\\link[=data_remove]{data_remove()}}\n\\item Functions to reshape, pivot or rotate data frames: \\code{\\link[=data_to_long]{data_to_long()}},\n\\code{\\link[=data_to_wide]{data_to_wide()}}, \\code{\\link[=data_rotate]{data_rotate()}}\n\\item Functions to recode data: \\code{\\link[=rescale]{rescale()}}, \\code{\\link[=reverse]{reverse()}}, \\code{\\link[=categorize]{categorize()}},\n\\code{\\link[=recode_values]{recode_values()}}, \\code{\\link[=slide]{slide()}}\n\\item Functions to standardize, normalize, rank-transform: \\code{\\link[=center]{center()}}, \\code{\\link[=standardize]{standardize()}},\n\\code{\\link[=normalize]{normalize()}}, \\code{\\link[=ranktransform]{ranktransform()}}, \\code{\\link[=winsorize]{winsorize()}}\n\\item Split and merge data frames: \\code{\\link[=data_partition]{data_partition()}}, \\code{\\link[=data_merge]{data_merge()}}\n\\item Functions to find or select columns: \\code{\\link[=data_select]{data_select()}}, \\code{\\link[=extract_column_names]{extract_column_names()}}\n\\item Functions to filter rows: \\code{\\link[=data_match]{data_match()}}, \\code{\\link[=data_filter]{data_filter()}}\n}\n}\n"
  },
  {
    "path": "paper/JOSS_files/apa.csl",
    "content": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<style xmlns=\"http://purl.org/net/xbiblio/csl\" class=\"in-text\" version=\"1.0\" demote-non-dropping-particle=\"never\" page-range-format=\"expanded\">\n  <info>\n    <title>American Psychological Association 7th edition</title>\n    <title-short>APA</title-short>\n    <id>http://www.zotero.org/styles/apa</id>\n    <link href=\"http://www.zotero.org/styles/apa\" rel=\"self\"/>\n    <link href=\"http://www.zotero.org/styles/apa-6th-edition\" rel=\"template\"/>\n    <link href=\"https://apastyle.apa.org/style-grammar-guidelines/references/examples\" rel=\"documentation\"/>\n    <author>\n      <name>Brenton M. Wiernik</name>\n      <email>zotero@wiernik.org</email>\n    </author>\n    <category citation-format=\"author-date\"/>\n    <category field=\"psychology\"/>\n    <category field=\"generic-base\"/>\n    <updated>2020-11-04T04:07:19+00:00</updated>\n    <rights license=\"http://creativecommons.org/licenses/by-sa/3.0/\">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>\n  </info>\n  <locale xml:lang=\"en\">\n    <terms>\n      <term name=\"editortranslator\" form=\"short\">\n        <single>ed. &amp; trans.</single>\n        <multiple>eds. &amp; trans.</multiple>\n      </term>\n      <term name=\"translator\" form=\"short\">trans.</term>\n      <term name=\"interviewer\" form=\"short\">\n        <single>interviewer</single>\n        <multiple>interviewers</multiple>\n      </term>\n      <term name=\"collection-editor\" form=\"short\">\n        <single>ed.</single>\n        <multiple>eds.</multiple>\n      </term>\n      <term name=\"circa\" form=\"short\">ca.</term>\n      <term name=\"bc\"> B.C.E.</term>\n      <term name=\"ad\"> C.E.</term>\n      <term name=\"letter\">personal communication</term>\n      <term name=\"letter\" form=\"short\">letter</term>\n      <term name=\"issue\" form=\"long\">\n        <single>issue</single>\n        <multiple>issues</multiple>\n      </term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"af\">\n    <terms>\n      <term name=\"letter\">persoonlike kommunikasie</term>\n      <term name=\"letter\" form=\"short\">brief</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"ar\">\n    <terms>\n      <term name=\"letter\">اتصال شخصي</term>\n      <term name=\"letter\" form=\"short\">خطاب</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"bg\">\n    <terms>\n      <term name=\"letter\">лична комуникация</term>\n      <term name=\"letter\" form=\"short\">писмо</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"ca\">\n    <terms>\n      <term name=\"letter\">comunicació personal</term>\n      <term name=\"letter\" form=\"short\">carta</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"cs\">\n    <terms>\n      <term name=\"letter\">osobní komunikace</term>\n      <term name=\"letter\" form=\"short\">dopis</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"cy\">\n    <terms>\n      <term name=\"letter\">cyfathrebu personol</term>\n      <term name=\"letter\" form=\"short\">llythyr</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"da\">\n    <terms>\n      <term name=\"et-al\">et al.</term>\n      <term name=\"letter\">personlig kommunikation</term>\n      <term name=\"letter\" form=\"short\">brev</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"de\">\n    <terms>\n      <term name=\"et-al\">et al.</term>\n      <term name=\"letter\">persönliche Kommunikation</term>\n      <term name=\"letter\" form=\"short\">Brief</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"el\">\n    <terms>\n      <term name=\"letter\">προσωπική επικοινωνία</term>\n      <term name=\"letter\" form=\"short\">επιστολή</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"es\">\n    <terms>\n      <term name=\"from\">de</term>\n      <term name=\"letter\">comunicación personal</term>\n      <term name=\"letter\" form=\"short\">carta</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"et\">\n    <terms>\n      <term name=\"letter\">isiklik suhtlus</term>\n      <term name=\"letter\" form=\"short\">kiri</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"eu\">\n    <terms>\n      <term name=\"letter\">komunikazio pertsonala</term>\n      <term name=\"letter\" form=\"short\">gutuna</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"fa\">\n    <terms>\n      <term name=\"letter\">ارتباط شخصی</term>\n      <term name=\"letter\" form=\"short\">نامه</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"fi\">\n    <terms>\n      <term name=\"letter\">henkilökohtainen viestintä</term>\n      <term name=\"letter\" form=\"short\">kirje</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"fr\">\n    <terms>\n      <term name=\"letter\">communication personnelle</term>\n      <term name=\"letter\" form=\"short\">lettre</term>\n      <term name=\"editor\" form=\"short\">\n        <single>éd.</single>\n        <multiple>éds.</multiple>\n      </term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"he\">\n    <terms>\n      <term name=\"letter\">תקשורת אישית</term>\n      <term name=\"letter\" form=\"short\">מכתב</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"hr\">\n    <terms>\n      <term name=\"letter\">osobna komunikacija</term>\n      <term name=\"letter\" form=\"short\">pismo</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"hu\">\n    <terms>\n      <term name=\"letter\">személyes kommunikáció</term>\n      <term name=\"letter\" form=\"short\">levél</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"id\">\n    <terms>\n      <term name=\"letter\">komunikasi pribadi</term>\n      <term name=\"letter\" form=\"short\">surat</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"is\">\n    <terms>\n      <term name=\"letter\">persónuleg samskipti</term>\n      <term name=\"letter\" form=\"short\">bréf</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"it\">\n    <terms>\n      <term name=\"letter\">comunicazione personale</term>\n      <term name=\"letter\" form=\"short\">lettera</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"ja\">\n    <terms>\n      <term name=\"letter\">個人的なやり取り</term>\n      <term name=\"letter\" form=\"short\">手紙</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"ko\">\n    <terms>\n      <term name=\"letter\">개인 서신</term>\n      <term name=\"letter\" form=\"short\">편지</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"la\">\n    <terms>\n      <term name=\"letter\"/>\n      <term name=\"letter\" form=\"short\">epistula</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"lt\">\n    <terms>\n      <term name=\"letter\">communicationis personalis</term>\n      <term name=\"letter\" form=\"short\"/>\n    </terms>\n  </locale>\n  <locale xml:lang=\"lv\">\n    <terms>\n      <term name=\"letter\">personīga komunikācija</term>\n      <term name=\"letter\" form=\"short\">vēstule</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"mn\">\n    <terms>\n      <term name=\"letter\">хувийн харилцаа холбоо</term>\n      <term name=\"letter\" form=\"short\">захиа</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"nb\">\n    <terms>\n      <term name=\"et-al\">et al.</term>\n      <term name=\"letter\">personlig kommunikasjon</term>\n      <term name=\"letter\" form=\"short\">brev</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"nl\">\n    <terms>\n      <term name=\"et-al\">et al.</term>\n      <term name=\"letter\">persoonlijke communicatie</term>\n      <term name=\"letter\" form=\"short\">brief</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"nn\">\n    <terms>\n      <term name=\"et-al\">et al.</term>\n      <term name=\"letter\">personlig kommunikasjon</term>\n      <term name=\"letter\" form=\"short\">brev</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"pl\">\n    <terms>\n      <term name=\"letter\">osobista komunikacja</term>\n      <term name=\"letter\" form=\"short\">list</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"pt\">\n    <terms>\n      <term name=\"letter\">comunicação pessoal</term>\n      <term name=\"letter\" form=\"short\">carta</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"ro\">\n    <terms>\n      <term name=\"letter\">comunicare personală</term>\n      <term name=\"letter\" form=\"short\">scrisoare</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"ru\">\n    <terms>\n      <term name=\"letter\">личная переписка</term>\n      <term name=\"letter\" form=\"short\">письмо</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"sk\">\n    <terms>\n      <term name=\"letter\">osobná komunikácia</term>\n      <term name=\"letter\" form=\"short\">list</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"sl\">\n    <terms>\n      <term name=\"letter\">osebna komunikacija</term>\n      <term name=\"letter\" form=\"short\">pismo</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"sr\">\n    <terms>\n      <term name=\"letter\">лична комуникација</term>\n      <term name=\"letter\" form=\"short\">писмо</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"sv\">\n    <terms>\n      <term name=\"letter\">personlig kommunikation</term>\n      <term name=\"letter\" form=\"short\">brev</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"th\">\n    <terms>\n      <term name=\"letter\">การสื่อสารส่วนบุคคล</term>\n      <term name=\"letter\" form=\"short\">จดหมาย</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"tr\">\n    <terms>\n      <term name=\"letter\">kişisel iletişim</term>\n      <term name=\"letter\" form=\"short\">mektup</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"uk\">\n    <terms>\n      <term name=\"letter\">особисте спілкування</term>\n      <term name=\"letter\" form=\"short\">лист</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"vi\">\n    <terms>\n      <term name=\"letter\">giao tiếp cá nhân</term>\n      <term name=\"letter\" form=\"short\">thư</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"zh-CN\">\n    <terms>\n      <term name=\"letter\">的私人交流</term>\n      <term name=\"letter\" form=\"short\">信函</term>\n    </terms>\n  </locale>\n  <locale xml:lang=\"zh-TW\">\n    <terms>\n      <term name=\"letter\">私人通訊</term>\n      <term name=\"letter\" form=\"short\">信函</term>\n    </terms>\n  </locale>\n  <!-- General categories of item types:\n       Periodical: article-journal article-magazine article-newspaper post-weblog review review-book\n       Periodical or Booklike: paper-conference\n       Booklike: article book broadcast chapter dataset entry entry-dictionary entry-encyclopedia figure \n                 graphic interview manuscript map motion_picture musical_score pamphlet patent \n                 personal_communication report song speech thesis post webpage\n       Legal: bill legal_case legislation treaty\n  -->\n  <!-- APA references contain four parts: author, date, title, source -->\n  <macro name=\"author-bib\">\n    <names variable=\"composer\" delimiter=\", \">\n      <name name-as-sort-order=\"all\" and=\"symbol\" sort-separator=\", \" initialize-with=\". \" delimiter=\", \" delimiter-precedes-last=\"always\"/>\n      <substitute>\n        <names variable=\"author\"/>\n        <names variable=\"illustrator\"/>\n        <names variable=\"director\">\n          <name name-as-sort-order=\"all\" and=\"symbol\" sort-separator=\", \" initialize-with=\". \" delimiter=\", \" delimiter-precedes-last=\"always\"/>\n          <label form=\"long\" prefix=\" (\" suffix=\")\" text-case=\"title\"/>\n        </names>\n        <choose>\n          <if variable=\"container-title\">\n            <choose>\n              <if type=\"book entry entry-dictionary entry-encyclopedia\" match=\"any\">\n                <choose>\n                  <if variable=\"title\">\n                    <group delimiter=\" \">\n                      <text macro=\"title\"/>\n                      <text macro=\"parenthetical\"/>\n                    </group>\n                  </if>\n                  <else>\n                    <text macro=\"title-and-descriptions\"/>\n                  </else>\n                </choose>\n              </if>\n            </choose>\n          </if>\n        </choose>\n        <!-- Test for editortranslator and put that first as that becomes available -->\n        <names variable=\"editor\" delimiter=\", \">\n          <name name-as-sort-order=\"all\" and=\"symbol\" sort-separator=\", \" initialize-with=\". \" delimiter=\", \" delimiter-precedes-last=\"always\"/>\n          <label form=\"short\" prefix=\" (\" suffix=\")\" text-case=\"title\"/>\n        </names>\n        <names variable=\"editorial-director\">\n          <name name-as-sort-order=\"all\" and=\"symbol\" sort-separator=\", \" initialize-with=\". \" delimiter=\", \" delimiter-precedes-last=\"always\"/>\n          <label form=\"short\" prefix=\" (\" suffix=\")\" text-case=\"title\"/>\n        </names>\n        <names variable=\"collection-editor\">\n          <name name-as-sort-order=\"all\" and=\"symbol\" sort-separator=\", \" initialize-with=\". \" delimiter=\", \" delimiter-precedes-last=\"always\"/>\n          <label form=\"short\" prefix=\" (\" suffix=\")\" text-case=\"title\"/>\n        </names>\n        <choose>\n          <if variable=\"title\">\n            <group delimiter=\" \">\n              <text macro=\"title\"/>\n              <text macro=\"parenthetical\"/>\n            </group>\n          </if>\n          <else>\n            <text macro=\"title-and-descriptions\"/>\n          </else>\n        </choose>\n      </substitute>\n    </names>\n  </macro>\n  <macro name=\"author-intext\">\n    <choose>\n      <if type=\"bill legal_case legislation treaty\" match=\"any\">\n        <text macro=\"title-intext\"/>\n      </if>\n      <else-if type=\"interview personal_communication\" match=\"any\">\n        <choose>\n          <!-- These variables indicate that the letter is retrievable by the reader. \n                If not, then use the APA in-text-only personal communication format -->\n          <if variable=\"archive container-title DOI publisher URL\" match=\"none\">\n            <group delimiter=\", \">\n              <names variable=\"author\">\n                <name and=\"symbol\" delimiter=\", \" initialize-with=\". \"/>\n                <substitute>\n                  <text macro=\"title-intext\"/>\n                </substitute>\n              </names>\n              <!-- Replace with term=\"personal-communication\" if that becomes available -->\n              <text term=\"letter\"/>\n            </group>\n          </if>\n          <else>\n            <names variable=\"author\" delimiter=\", \">\n              <name form=\"short\" and=\"symbol\" delimiter=\", \" initialize-with=\". \"/>\n              <substitute>\n                <text macro=\"title-intext\"/>\n              </substitute>\n            </names>\n          </else>\n        </choose>\n      </else-if>\n      <else>\n        <names variable=\"composer\" delimiter=\", \">\n          <name form=\"short\" and=\"symbol\" delimiter=\", \" initialize-with=\". \"/>\n          <substitute>\n            <names variable=\"author\"/>\n            <names variable=\"illustrator\"/>\n            <names variable=\"director\"/>\n            <choose>\n              <if variable=\"container-title\">\n                <choose>\n                  <if type=\"book entry entry-dictionary entry-encyclopedia\" match=\"any\">\n                    <text macro=\"title-intext\"/>\n                  </if>\n                </choose>\n              </if>\n            </choose>\n            <names variable=\"editor\"/>\n            <names variable=\"editorial-director\"/>\n            <text macro=\"title-intext\"/>\n          </substitute>\n        </names>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"date-bib\">\n    <group delimiter=\" \" prefix=\"(\" suffix=\")\">\n      <choose>\n        <if is-uncertain-date=\"issued\">\n          <text term=\"circa\" form=\"short\"/>\n        </if>\n      </choose>\n      <group>\n        <choose>\n          <if variable=\"issued\">\n            <date variable=\"issued\">\n              <date-part name=\"year\"/>\n            </date>\n            <text variable=\"year-suffix\"/>\n            <choose>\n              <if type=\"article-magazine article-newspaper broadcast interview motion_picture pamphlet personal_communication post post-weblog song speech webpage\" match=\"any\">\n                <!-- Many video and audio examples in manual give full dates. Err on the side of too much information. -->\n                <date variable=\"issued\">\n                  <date-part prefix=\", \" name=\"month\"/>\n                  <date-part prefix=\" \" name=\"day\"/>\n                </date>\n              </if>\n              <else-if type=\"paper-conference\">\n                <!-- Capture 'speech' stored as 'paper-conference' -->\n                <choose>\n                  <if variable=\"collection-editor editor editorial-director issue page volume\" match=\"none\">\n                    <date variable=\"issued\">\n                      <date-part prefix=\", \" name=\"month\"/>\n                      <date-part prefix=\" \" name=\"day\"/>\n                    </date>\n                  </if>\n                </choose>\n              </else-if>\n              <!-- Only year: article article-journal book chapter entry entry-dictionary entry-encyclopedia dataset figure graphic \n                   manuscript map musical_score paper-conference[published] patent report review review-book thesis -->\n            </choose>\n          </if>\n          <else-if variable=\"status\">\n            <group>\n              <text variable=\"status\" text-case=\"lowercase\"/>\n              <text variable=\"year-suffix\" prefix=\"-\"/>\n            </group>\n          </else-if>\n          <else>\n            <group>\n              <text term=\"no date\" form=\"short\"/>\n              <text variable=\"year-suffix\" prefix=\"-\"/>\n            </group>\n          </else>\n        </choose>\n      </group>\n    </group>\n  </macro>\n  <macro name=\"date-sort-group\">\n    <choose>\n      <if variable=\"issued\">\n        <text value=\"1\"/>\n      </if>\n      <else-if variable=\"status\">\n        <text value=\"2\"/>\n      </else-if>\n      <else>\n        <text value=\"0\"/>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"date-sort-date\">\n    <choose>\n      <if type=\"article-magazine article-newspaper broadcast interview pamphlet personal_communication post post-weblog speech treaty webpage\" match=\"any\">\n        <date variable=\"issued\" form=\"numeric\"/>\n      </if>\n      <else-if type=\"paper-conference\">\n        <!-- Capture 'speech' stored as 'paper-conference' -->\n        <choose>\n          <if variable=\"collection-editor editor editorial-director issue page volume\" match=\"none\">\n            <date variable=\"issued\" form=\"numeric\"/>\n          </if>\n        </choose>\n      </else-if>\n      <else>\n        <date variable=\"issued\" form=\"numeric\"/>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"date-intext\">\n    <choose>\n      <if variable=\"issued\">\n        <group delimiter=\"/\">\n          <group delimiter=\" \">\n            <choose>\n              <if is-uncertain-date=\"original-date\">\n                <text term=\"circa\" form=\"short\"/>\n              </if>\n            </choose>\n            <date variable=\"original-date\">\n              <date-part name=\"year\"/>\n            </date>\n          </group>\n          <group delimiter=\" \">\n            <choose>\n              <if is-uncertain-date=\"issued\">\n                <text term=\"circa\" form=\"short\"/>\n              </if>\n            </choose>\n            <group>\n              <choose>\n                <if type=\"interview personal_communication\" match=\"any\">\n                  <choose>\n                    <if variable=\"archive container-title DOI publisher URL\" match=\"none\">\n                      <!-- These variables indicate that the communication is retrievable by the reader. \n                           If not, then use the in-text-only personal communication format -->\n                      <date variable=\"issued\" form=\"text\"/>\n                    </if>\n                    <else>\n                      <date variable=\"issued\">\n                        <date-part name=\"year\"/>\n                      </date>\n                    </else>\n                  </choose>\n                </if>\n                <else>\n                  <date variable=\"issued\">\n                    <date-part name=\"year\"/>\n                  </date>\n                </else>\n              </choose>\n              <text variable=\"year-suffix\"/>\n            </group>\n          </group>\n        </group>\n      </if>\n      <else-if variable=\"status\">\n        <text variable=\"status\" text-case=\"lowercase\"/>\n        <text variable=\"year-suffix\" prefix=\"-\"/>\n      </else-if>\n      <else>\n        <text term=\"no date\" form=\"short\"/>\n        <text variable=\"year-suffix\" prefix=\"-\"/>\n      </else>\n    </choose>\n  </macro>\n  <!-- APA has two description elements following the title:\n       title (parenthetical) [bracketed]  -->\n  <macro name=\"title-and-descriptions\">\n    <choose>\n      <if variable=\"title\">\n        <group delimiter=\" \">\n          <text macro=\"title\"/>\n          <text macro=\"parenthetical\"/>\n          <text macro=\"bracketed\"/>\n        </group>\n      </if>\n      <else>\n        <group delimiter=\" \">\n          <text macro=\"bracketed\"/>\n          <text macro=\"parenthetical\"/>\n        </group>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"title\">\n    <choose>\n      <if type=\"post webpage\" match=\"any\">\n        <!-- Webpages are always italicized -->\n        <text variable=\"title\" font-style=\"italic\"/>\n      </if>\n      <else-if variable=\"container-title\" match=\"any\">\n        <!-- Other types are italicized based on presence of container-title.\n             Assume that review and review-book are published in periodicals/blogs,\n             not just on a web page (ex. 69) -->\n        <text variable=\"title\"/>\n      </else-if>\n      <else>\n        <choose>\n          <if type=\"article-journal article-magazine article-newspaper post-weblog review review-book\" match=\"any\">\n            <text variable=\"title\" font-style=\"italic\"/>\n          </if>\n          <else-if type=\"paper-conference\">\n            <choose>\n              <if variable=\"collection-editor editor editorial-director\" match=\"any\">\n                <group delimiter=\": \" font-style=\"italic\">\n                  <text variable=\"title\"/>\n                  <!-- Replace with volume-title as that becomes available -->\n                  <choose>\n                    <if is-numeric=\"volume\" match=\"none\">\n                      <group delimiter=\" \">\n                        <label variable=\"volume\" form=\"short\" text-case=\"capitalize-first\"/>\n                        <text variable=\"volume\"/>\n                      </group>\n                    </if>\n                  </choose>\n                </group>\n              </if>\n              <else>\n                <text variable=\"title\" font-style=\"italic\"/>\n              </else>\n            </choose>\n          </else-if>\n          <else>\n            <group delimiter=\": \" font-style=\"italic\">\n              <text variable=\"title\"/>\n              <!-- Replace with volume-title as that becomes available -->\n              <choose>\n                <if is-numeric=\"volume\" match=\"none\">\n                  <group delimiter=\" \">\n                    <label variable=\"volume\" form=\"short\" text-case=\"capitalize-first\"/>\n                    <text variable=\"volume\"/>\n                  </group>\n                </if>\n              </choose>\n            </group>\n          </else>\n        </choose>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"title-intext\">\n    <choose>\n      <if variable=\"title\" match=\"none\">\n        <text macro=\"bracketed-intext\" prefix=\"[\" suffix=\"]\"/>\n      </if>\n      <else-if type=\"bill\">\n        <!-- If a bill has no number or container-title, assume it is a hearing; italic -->\n        <choose>\n          <if variable=\"number container-title\" match=\"none\">\n            <text variable=\"title\" form=\"short\" font-style=\"italic\" text-case=\"title\"/>\n          </if>\n          <else-if variable=\"title\">\n            <text variable=\"title\" form=\"short\" text-case=\"title\"/>\n          </else-if>\n          <else>\n            <group delimiter=\" \">\n              <text variable=\"genre\"/>\n              <group delimiter=\" \">\n                <choose>\n                  <if variable=\"chapter-number container-title\" match=\"none\">\n                    <!-- Replace with label variable=\"number\" as that becomes available -->\n                    <text term=\"issue\" form=\"short\"/>\n                  </if>\n                </choose>\n                <text variable=\"number\"/>\n              </group>\n            </group>\n          </else>\n        </choose>\n      </else-if>\n      <else-if type=\"legal_case\" match=\"any\">\n        <!-- Cases are italicized -->\n        <text variable=\"title\" font-style=\"italic\"/>\n      </else-if>\n      <else-if type=\"legislation treaty\" match=\"any\">\n        <!-- Legislation and treaties not italicized or quoted -->\n        <text variable=\"title\" form=\"short\" text-case=\"title\"/>\n      </else-if>\n      <else-if type=\"post webpage\" match=\"any\">\n        <!-- Webpages are always italicized -->\n        <text variable=\"title\" form=\"short\" font-style=\"italic\" text-case=\"title\"/>\n      </else-if>\n      <else-if variable=\"container-title\" match=\"any\">\n        <!-- Other types are italicized or quoted based on presence of container-title. As in title macro. -->\n        <text variable=\"title\" form=\"short\" quotes=\"true\" text-case=\"title\"/>\n      </else-if>\n      <else>\n        <text variable=\"title\" form=\"short\" font-style=\"italic\" text-case=\"title\"/>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"parenthetical\">\n    <!-- (Secondary contributors; Database location; Genre no. 123; Report Series 123, Version, Edition, Volume, Page) -->\n    <group prefix=\"(\" suffix=\")\">\n      <choose>\n        <if type=\"patent\">\n          <!-- authority: U.S. ; genre: patent ; number: 123,445 -->\n          <group delimiter=\" \">\n            <text variable=\"authority\" form=\"short\"/>\n            <choose>\n              <if variable=\"genre\">\n                <text variable=\"genre\" text-case=\"capitalize-first\"/>\n              </if>\n              <else>\n                <!-- This should be localized -->\n                <text value=\"patent\" text-case=\"capitalize-first\"/>\n              </else>\n            </choose>\n            <group delimiter=\" \">\n              <!-- Replace with label variable=\"number\" if that becomes available -->\n              <text term=\"issue\" form=\"short\" text-case=\"capitalize-first\"/>\n              <text variable=\"number\"/>\n            </group>\n          </group>\n        </if>\n        <else-if type=\"post webpage\" match=\"any\">\n          <!-- For post webpage, container-title is treated as publisher -->\n          <group delimiter=\"; \">\n            <text macro=\"secondary-contributors\"/>\n            <text macro=\"database-location\"/>\n            <text macro=\"number\"/>\n            <text macro=\"locators-booklike\"/>\n          </group>\n        </else-if>\n        <else-if variable=\"container-title\">\n          <group delimiter=\"; \">\n            <text macro=\"secondary-contributors\"/>\n            <choose>\n              <if type=\"broadcast graphic map motion_picture song\" match=\"any\">\n                <!-- For audiovisual media, number information comes after title, not container-title -->\n                <text macro=\"number\"/>\n              </if>\n            </choose>\n          </group>\n        </else-if>\n        <else>\n          <group delimiter=\"; \">\n            <text macro=\"secondary-contributors\"/>\n            <text macro=\"database-location\"/>\n            <text macro=\"number\"/>\n            <text macro=\"locators-booklike\"/>\n          </group>\n        </else>\n      </choose>\n    </group>\n  </macro>\n  <macro name=\"parenthetical-container\">\n    <choose>\n      <if variable=\"container-title\" match=\"any\">\n        <group prefix=\"(\" suffix=\")\">\n          <group delimiter=\"; \">\n            <text macro=\"database-location\"/>\n            <choose>\n              <if type=\"broadcast graphic map motion_picture song\" match=\"none\">\n                <!-- For audiovisual media, number information comes after title, not container-title -->\n                <text macro=\"number\"/>\n              </if>\n            </choose>\n            <text macro=\"locators-booklike\"/>\n          </group>\n        </group>\n      </if>\n    </choose>\n  </macro>\n  <macro name=\"bracketed\">\n    <!-- [Descriptive information] -->\n    <!-- If there is a number, genre is already printed in macro=\"number\" -->\n    <group prefix=\"[\" suffix=\"]\">\n      <choose>\n        <if variable=\"reviewed-author reviewed-title\" type=\"review review-book\" match=\"any\">\n          <!-- Reviewed item -->\n          <group delimiter=\"; \">\n            <group delimiter=\", \">\n              <group delimiter=\" \">\n                <!-- Assume that genre is entered as 'Review of the book' or similar -->\n                <choose>\n                  <if variable=\"number\" match=\"none\">\n                    <choose>\n                      <if variable=\"genre\">\n                        <text variable=\"genre\" text-case=\"capitalize-first\"/>\n                      </if>\n                      <else-if variable=\"medium\">\n                        <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                      </else-if>\n                      <else>\n                        <!-- Replace with term=\"review\" as that becomes available -->\n                        <text value=\"Review of\"/>\n                      </else>\n                    </choose>\n                  </if>\n                  <else>\n                    <choose>\n                      <if variable=\"medium\">\n                        <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                      </if>\n                      <else>\n                        <!-- Replace with term=\"review\" as that becomes available -->\n                        <text value=\"Review of\"/>\n                      </else>\n                    </choose>\n                  </else>\n                </choose>\n                <text macro=\"reviewed-title\"/>\n              </group>\n              <names variable=\"reviewed-author\">\n                <label form=\"verb-short\" suffix=\" \"/>\n                <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n              </names>\n            </group>\n            <choose>\n              <if variable=\"genre\" match=\"any\">\n                <choose>\n                  <if variable=\"number\" match=\"none\">\n                    <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                  </if>\n                </choose>\n              </if>\n            </choose>\n          </group>\n        </if>\n        <else-if type=\"thesis\">\n          <!-- Thesis type and institution -->\n          <group delimiter=\"; \">\n            <choose>\n              <if variable=\"number\" match=\"none\">\n                <group delimiter=\", \">\n                  <text variable=\"genre\" text-case=\"capitalize-first\"/>\n                  <choose>\n                    <if variable=\"archive DOI URL\" match=\"any\">\n                      <!-- Include the university in brackets if thesis is published -->\n                      <text variable=\"publisher\"/>\n                    </if>\n                  </choose>\n                </group>\n              </if>\n            </choose>\n            <text variable=\"medium\" text-case=\"capitalize-first\"/>\n          </group>\n        </else-if>\n        <else-if variable=\"interviewer\" type=\"interview\" match=\"any\">\n          <!-- Interview information -->\n          <choose>\n            <if variable=\"title\">\n              <text macro=\"format\"/>\n            </if>\n            <else-if variable=\"genre\">\n              <group delimiter=\"; \">\n                <group delimiter=\" \">\n                  <text variable=\"genre\" text-case=\"capitalize-first\"/>\n                  <group delimiter=\" \">\n                    <text term=\"author\" form=\"verb\"/>\n                    <names variable=\"interviewer\">\n                      <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                    </names>\n                  </group>\n                </group>\n              </group>\n            </else-if>\n            <else-if variable=\"interviewer\">\n              <group delimiter=\"; \">\n                <names variable=\"interviewer\">\n                  <label form=\"verb\" suffix=\" \" text-case=\"capitalize-first\"/>\n                  <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                </names>\n                <text variable=\"medium\" text-case=\"capitalize-first\"/>\n              </group>\n            </else-if>\n            <else>\n              <text macro=\"format\"/>\n            </else>\n          </choose>\n        </else-if>\n        <else-if type=\"personal_communication\">\n          <!-- Letter information -->\n          <choose>\n            <if variable=\"recipient\">\n              <group delimiter=\"; \">\n                <group delimiter=\" \">\n                  <choose>\n                    <if variable=\"number\" match=\"none\">\n                      <choose>\n                        <if variable=\"genre\">\n                          <text variable=\"genre\" text-case=\"capitalize-first\"/>\n                        </if>\n                        <else-if variable=\"medium\">\n                          <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                        </else-if>\n                        <else>\n                          <text term=\"letter\" form=\"short\" text-case=\"capitalize-first\"/>\n                        </else>\n                      </choose>\n                    </if>\n                    <else>\n                      <choose>\n                        <if variable=\"medium\">\n                          <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                        </if>\n                        <else>\n                          <text term=\"letter\" form=\"short\" text-case=\"capitalize-first\"/>\n                        </else>\n                      </choose>\n                    </else>\n                  </choose>\n                  <names variable=\"recipient\" delimiter=\", \">\n                    <label form=\"verb\" suffix=\" \"/>\n                    <name and=\"symbol\" delimiter=\", \"/>\n                  </names>\n                </group>\n                <choose>\n                  <if variable=\"genre\" match=\"any\">\n                    <choose>\n                      <if variable=\"number\" match=\"none\">\n                        <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                      </if>\n                    </choose>\n                  </if>\n                </choose>\n              </group>\n            </if>\n            <else>\n              <text macro=\"format\"/>\n            </else>\n          </choose>\n        </else-if>\n        <else-if variable=\"composer\" type=\"song\" match=\"all\">\n          <!-- Performer of classical music works -->\n          <group delimiter=\"; \">\n            <choose>\n              <if variable=\"number\" match=\"none\">\n                <group delimiter=\" \">\n                  <choose>\n                    <if variable=\"genre\">\n                      <text variable=\"genre\" text-case=\"capitalize-first\"/>\n                      <!-- Replace prefix with performer label as that becomes available -->\n                      <names variable=\"author\" prefix=\"recorded by \">\n                        <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                      </names>\n                    </if>\n                    <else-if variable=\"medium\">\n                      <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                      <!-- Replace prefix with performer label as that becomes available -->\n                      <names variable=\"author\" prefix=\"recorded by \">\n                        <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                      </names>\n                    </else-if>\n                    <else>\n                      <!-- Replace prefix with performer label as that becomes available -->\n                      <names variable=\"author\" prefix=\"Recorded by \">\n                        <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                      </names>\n                    </else>\n                  </choose>\n                </group>\n              </if>\n              <else>\n                <group delimiter=\" \">\n                  <choose>\n                    <if variable=\"medium\">\n                      <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                      <!-- Replace prefix with performer label as that becomes available -->\n                      <names variable=\"author\" prefix=\"recorded by \">\n                        <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                      </names>\n                    </if>\n                    <else>\n                      <!-- Replace prefix with performer label as that becomes available -->\n                      <names variable=\"author\" prefix=\"Recorded by \">\n                        <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                      </names>\n                    </else>\n                  </choose>\n                </group>\n              </else>\n            </choose>\n            <choose>\n              <if variable=\"genre\" match=\"any\">\n                <choose>\n                  <if variable=\"number\" match=\"none\">\n                    <text variable=\"medium\" text-case=\"capitalize-first\"/>\n                  </if>\n                </choose>\n              </if>\n            </choose>\n          </group>\n        </else-if>\n        <else-if variable=\"container-title\" match=\"none\">\n          <!-- Other description -->\n          <text macro=\"format\"/>\n        </else-if>\n        <else>\n          <!-- For conference presentations, chapters in reports, software, place bracketed after the container title -->\n          <choose>\n            <if type=\"paper-conference speech\" match=\"any\">\n              <choose>\n                <if variable=\"collection-editor editor editorial-director issue page volume\" match=\"any\">\n                  <text macro=\"format\"/>\n                </if>\n              </choose>\n            </if>\n            <else-if type=\"book\">\n              <choose>\n                <if variable=\"version\" match=\"none\">\n                  <text macro=\"format\"/>\n                </if>\n              </choose>\n            </else-if>\n            <else-if type=\"report\" match=\"none\">\n              <text macro=\"format\"/>\n            </else-if>\n          </choose>\n        </else>\n      </choose>\n    </group>\n  </macro>\n  <macro name=\"bracketed-intext\">\n    <group prefix=\"[\" suffix=\"]\">\n      <choose>\n        <if variable=\"reviewed-author reviewed-title\" type=\"review review-book\" match=\"any\">\n          <!-- This should be localized -->\n          <text macro=\"reviewed-title-intext\" prefix=\"Review of \"/>\n        </if>\n        <else-if variable=\"interviewer\" type=\"interview\" match=\"any\">\n          <names variable=\"interviewer\">\n            <label form=\"verb\" suffix=\" \" text-case=\"capitalize-first\"/>\n            <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n            <substitute>\n              <text macro=\"format-intext\"/>\n            </substitute>\n          </names>\n        </else-if>\n        <else-if type=\"personal_communication\">\n          <!-- Letter information -->\n          <choose>\n            <if variable=\"recipient\">\n              <group delimiter=\" \">\n                <choose>\n                  <if variable=\"number\" match=\"none\">\n                    <text variable=\"genre\" text-case=\"capitalize-first\"/>\n                  </if>\n                  <else>\n                    <text term=\"letter\" form=\"short\" text-case=\"capitalize-first\"/>\n                  </else>\n                </choose>\n                <names variable=\"recipient\" delimiter=\", \">\n                  <label form=\"verb\" suffix=\" \"/>\n                  <name and=\"symbol\" delimiter=\", \"/>\n                </names>\n              </group>\n            </if>\n            <else>\n              <text macro=\"format-intext\"/>\n            </else>\n          </choose>\n        </else-if>\n        <else>\n          <text macro=\"format-intext\"/>\n        </else>\n      </choose>\n    </group>\n  </macro>\n  <macro name=\"bracketed-container\">\n    <group prefix=\"[\" suffix=\"]\">\n      <choose>\n        <if type=\"paper-conference speech\" match=\"any\">\n          <!-- Conference presentations should describe the session [container] in bracketed unless published in a proceedings -->\n          <choose>\n            <if variable=\"collection-editor editor editorial-director issue page volume\" match=\"none\">\n              <text macro=\"format\"/>\n            </if>\n          </choose>\n        </if>\n        <else-if type=\"book\" variable=\"version\" match=\"all\">\n          <!-- For entries in mobile app reference works, place bracketed after the container-title -->\n          <text macro=\"format\"/>\n        </else-if>\n        <else-if type=\"report\">\n          <!-- For chapters in reports, place bracketed after the container title -->\n          <text macro=\"format\"/>\n        </else-if>\n      </choose>\n    </group>\n  </macro>\n  <macro name=\"secondary-contributors\">\n    <choose>\n      <if type=\"article-journal article-magazine article-newspaper post-weblog review review-book\" match=\"any\">\n        <text macro=\"secondary-contributors-periodical\"/>\n      </if>\n      <else-if type=\"paper-conference\">\n        <choose>\n          <if variable=\"collection-editor editor editorial-director\" match=\"any\">\n            <text macro=\"secondary-contributors-booklike\"/>\n          </if>\n          <else>\n            <text macro=\"secondary-contributors-periodical\"/>\n          </else>\n        </choose>\n      </else-if>\n      <else>\n        <text macro=\"secondary-contributors-booklike\"/>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"secondary-contributors-periodical\">\n    <group delimiter=\"; \">\n      <choose>\n        <if variable=\"title\">\n          <names variable=\"interviewer\" delimiter=\"; \">\n            <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n            <label form=\"short\" prefix=\", \" text-case=\"title\"/>\n          </names>\n        </if>\n      </choose>\n      <names variable=\"translator\" delimiter=\"; \">\n        <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n        <label form=\"short\" prefix=\", \" text-case=\"title\"/>\n      </names>\n    </group>\n  </macro>\n  <macro name=\"secondary-contributors-booklike\">\n    <group delimiter=\"; \">\n      <choose>\n        <if variable=\"title\">\n          <names variable=\"interviewer\">\n            <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n            <label form=\"short\" prefix=\", \" text-case=\"title\"/>\n          </names>\n        </if>\n      </choose>\n      <!-- When editortranslator becomes available, add a test: variable=\"editortranslator\" match=\"none\"; then print translator -->\n      <choose>\n        <if type=\"post webpage\" match=\"none\">\n          <!-- Webpages treat container-title like publisher -->\n          <choose>\n            <if variable=\"container-title\" match=\"none\">\n              <group delimiter=\"; \">\n                <names variable=\"container-author\">\n                  <label form=\"verb-short\" suffix=\" \" text-case=\"title\"/>\n                  <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                </names>\n                <names variable=\"editor translator\" delimiter=\"; \">\n                  <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n                  <label form=\"short\" prefix=\", \" text-case=\"title\"/>\n                </names>\n              </group>\n            </if>\n          </choose>\n        </if>\n        <else>\n          <group delimiter=\"; \">\n            <names variable=\"container-author\">\n              <label form=\"verb-short\" suffix=\" \" text-case=\"title\"/>\n              <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n            </names>\n            <names variable=\"editor translator\" delimiter=\"; \">\n              <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n              <label form=\"short\" prefix=\", \" text-case=\"title\"/>\n            </names>\n          </group>\n        </else>\n      </choose>\n    </group>\n  </macro>\n  <macro name=\"database-location\">\n    <choose>\n      <if variable=\"archive-place\" match=\"none\">\n        <!-- With `archive-place`: physical archives. Without: online archives. -->\n        <!-- Add archive_collection as that becomes available -->\n        <text variable=\"archive_location\"/>\n      </if>\n    </choose>\n  </macro>\n  <macro name=\"number\">\n    <choose>\n      <if variable=\"number\">\n        <group delimiter=\", \">\n          <group delimiter=\" \">\n            <text variable=\"genre\" text-case=\"title\"/>\n            <choose>\n              <if is-numeric=\"number\">\n                <!-- Replace with label variable=\"number\" if that becomes available -->\n                <text term=\"issue\" form=\"short\" text-case=\"capitalize-first\"/>\n                <text variable=\"number\"/>\n              </if>\n              <else>\n                <text variable=\"number\"/>\n              </else>\n            </choose>\n          </group>\n          <choose>\n            <if type=\"thesis\">\n              <choose>\n                <!-- Include the university in brackets if thesis is published -->\n                <if variable=\"archive DOI URL\" match=\"any\">\n                  <text variable=\"publisher\"/>\n                </if>\n              </choose>\n            </if>\n          </choose>\n        </group>\n      </if>\n    </choose>\n  </macro>\n  <macro name=\"locators-booklike\">\n    <choose>\n      <if type=\"article-journal article-magazine article-newspaper broadcast interview patent post post-weblog review review-book speech webpage\" match=\"any\"/>\n      <else-if type=\"paper-conference\">\n        <choose>\n          <if variable=\"collection-editor editor editorial-director\" match=\"any\">\n            <group delimiter=\", \">\n              <text macro=\"version\"/>\n              <text macro=\"edition\"/>\n              <text macro=\"volume-booklike\"/>\n            </group>\n          </if>\n        </choose>\n      </else-if>\n      <else>\n        <group delimiter=\", \">\n          <text macro=\"version\"/>\n          <text macro=\"edition\"/>\n          <text macro=\"volume-booklike\"/>\n        </group>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"version\">\n    <choose>\n      <if is-numeric=\"version\">\n        <group delimiter=\" \">\n          <!-- replace with label variable=\"version\" if that becomes available -->\n          <text term=\"version\" text-case=\"capitalize-first\"/>\n          <text variable=\"version\"/>\n        </group>\n      </if>\n      <else>\n        <text variable=\"version\"/>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"edition\">\n    <choose>\n      <if is-numeric=\"edition\">\n        <group delimiter=\" \">\n          <number variable=\"edition\" form=\"ordinal\"/>\n          <label variable=\"edition\" form=\"short\"/>\n        </group>\n      </if>\n      <else>\n        <text variable=\"edition\"/>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"volume-booklike\">\n    <group delimiter=\", \">\n      <!-- Report series [ex. 52] -->\n      <choose>\n        <if type=\"report\">\n          <group delimiter=\" \">\n            <text variable=\"collection-title\" text-case=\"title\"/>\n            <text variable=\"collection-number\"/>\n          </group>\n        </if>\n      </choose>\n      <choose>\n        <if variable=\"volume\" match=\"any\">\n          <choose>\n            <!-- Non-numeric volumes are already printed as part of the book title -->\n            <if is-numeric=\"volume\" match=\"none\"/>\n            <else>\n              <group delimiter=\" \">\n                <label variable=\"volume\" form=\"short\" text-case=\"capitalize-first\"/>\n                <number variable=\"volume\" form=\"numeric\"/>\n              </group>\n            </else>\n          </choose>\n        </if>\n        <else>\n          <group>\n            <!-- Replace with label variable=\"number-of-volumes\" if that becomes available -->\n            <text term=\"volume\" form=\"short\" text-case=\"capitalize-first\" suffix=\" \"/>\n            <text term=\"page-range-delimiter\" prefix=\"1\"/>\n            <number variable=\"number-of-volumes\" form=\"numeric\"/>\n          </group>\n        </else>\n      </choose>\n      <group delimiter=\" \">\n        <label variable=\"issue\" text-case=\"capitalize-first\"/>\n        <text variable=\"issue\"/>\n      </group>\n      <group delimiter=\" \">\n        <label variable=\"page\" form=\"short\" suffix=\" \"/>\n        <text variable=\"page\"/>\n      </group>\n    </group>\n  </macro>\n  <macro name=\"reviewed-title\">\n    <choose>\n      <if variable=\"reviewed-title\">\n        <!-- Not possible to distinguish TV series episode from other reviewed \n              works [Ex. 69] -->\n        <text variable=\"reviewed-title\" font-style=\"italic\"/>\n      </if>\n      <else>\n        <!-- Assume title is title of reviewed work -->\n        <text variable=\"title\" font-style=\"italic\"/>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"reviewed-title-intext\">\n    <choose>\n      <if variable=\"reviewed-title\">\n        <!-- Not possible to distinguish TV series episode from other reviewed works [Ex. 69] -->\n        <text variable=\"reviewed-title\" form=\"short\" font-style=\"italic\" text-case=\"title\"/>\n      </if>\n      <else>\n        <!-- Assume title is title of reviewed work -->\n        <text variable=\"title\" form=\"short\" font-style=\"italic\" text-case=\"title\"/>\n      </else>\n    </choose>\n  </macro>\n  <macro name=\"format\">\n    <choose>\n      <if variable=\"genre medium\" match=\"any\">\n        <group delimiter=\"; \">\n          <choose>\n            <if variable=\"number\" match=\"none\">\n              <text variable=\"genre\" text-case=\"capitalize-first\"/>\n            </if>\n          </choose>\n          <text variable=\"medium\" text-case=\"capitalize-first\"/>\n        </group>\n      </if>\n      <!-- Generic labels for specific types -->\n      <!-- These should be localized when possible -->\n      <else-if type=\"dataset\">\n        <text value=\"Data set\"/>\n      </else-if>\n      <else-if type=\"book\" variable=\"version\" match=\"all\">\n        <!-- Replace with type=\"software\" and term=\"software\" as that becomes available -->\n        <text value=\"Computer software\"/>\n      </else-if>\n      <else-if type=\"interview personal_communication\" match=\"any\">\n        <choose>\n          <if variable=\"archive container-title DOI publisher URL\" match=\"none\">\n            <text term=\"letter\" text-case=\"capitalize-first\"/>\n          </if>\n          <else-if type=\"interview\">\n            <text term=\"interview\" text-case=\"capitalize-first\"/>\n          </else-if>\n        </choose>\n      </else-if>\n      <else-if type=\"map\">\n        <text value=\"Map\"/>\n      </else-if>\n    </choose>\n  </macro>\n  <macro name=\"format-intext\">\n    <choose>\n      <if variable=\"genre\" match=\"any\">\n        <text variable=\"genre\" text-case=\"capitalize-first\"/>\n      </if>\n      <else-if variable=\"medium\">\n        <text variable=\"medium\" text-case=\"capitalize-first\"/>\n      </else-if>\n      <!-- Generic labels for specific types -->\n      <!-- These should be localized when possible -->\n      <else-if type=\"dataset\">\n        <text value=\"Data set\"/>\n      </else-if>\n      <else-if type=\"book\" variable=\"version\" match=\"all\">\n        <!-- Replace with type=\"software\" and term=\"software\" as that becomes available -->\n        <text value=\"Computer software\"/>\n      </else-if>\n      <else-if type=\"interview personal_communication\" match=\"any\">\n        <choose>\n          <if variable=\"archive container-title DOI publisher URL\" match=\"none\">\n            <text term=\"letter\" text-case=\"capitalize-first\"/>\n          </if>\n          <else-if type=\"interview\">\n            <text term=\"interview\" text-case=\"capitalize-first\"/>\n          </else-if>\n        </choose>\n      </else-if>\n      <else-if type=\"map\">\n        <text value=\"Map\"/>\n      </else-if>\n    </choose>\n  </macro>\n  <!-- APA 'source' element contains four parts:\n       container, event, publisher, access -->\n  <macro name=\"container\">\n    <choose>\n      <if type=\"article-journal article-magazine article-newspaper post-weblog review review-book\" match=\"any\">\n        <!-- Periodical items -->\n        <text macro=\"container-periodical\"/>\n      </if>\n      <else-if type=\"paper-conference\">\n        <!-- Determine if paper-conference is a periodical or booklike -->\n        <choose>\n          <if variable=\"editor editorial-director collection-editor container-author\" match=\"any\">\n            <text macro=\"container-booklike\"/>\n          </if>\n          <else>\n            <text macro=\"container-periodical\"/>\n          </else>\n        </choose>\n      </else-if>\n      <else-if type=\"post webpage\" match=\"none\">\n        <!-- post and webpage treat container-title like publisher -->\n        <text macro=\"container-booklike\"/>\n      </else-if>\n    </choose>\n  </macro>\n  <macro name=\"container-periodical\">\n    <group delimiter=\". \">\n      <group delimiter=\", \">\n        <text variable=\"container-title\" font-style=\"italic\" text-case=\"title\"/>\n        <choose>\n          <if variable=\"volume\">\n            <group>\n              <text variable=\"volume\" font-style=\"italic\"/>\n              <text variable=\"issue\" prefix=\"(\" suffix=\")\"/>\n            </group>\n          </if>\n          <else>\n            <text variable=\"issue\" font-style=\"italic\"/>\n          </else>\n        </choose>\n        <choose>\n          <if variable=\"page\">\n            <text variable=\"page\"/>\n          </if>\n          <else>\n            <!-- Ex. 6: Journal article with article number or eLocator -->\n            <!-- This should be localized -->\n            <text variable=\"number\" prefix=\"Article \"/>\n          </else>\n        </choose>\n      </group>\n      <choose>\n        <if variable=\"issued\">\n          <choose>\n            <if variable=\"issue page volume\" match=\"none\">\n              <text variable=\"status\" text-case=\"capitalize-first\"/>\n            </if>\n          </choose>\n        </if>\n      </choose>\n    </group>\n  </macro>\n  <macro name=\"container-booklike\">\n    <choose>\n      <if variable=\"container-title\" match=\"any\">\n        <group delimiter=\" \">\n          <text term=\"in\" text-case=\"capitalize-first\"/>\n          <group delimiter=\", \">\n            <names variable=\"editor translator\" delimiter=\", &amp; \">\n              <!-- Change to editortranslator and move editor to substitute as that becomes available -->\n              <name and=\"symbol\" initialize-with=\". \" delimiter=\", \"/>\n              <label form=\"short\" text-case=\"title\" prefix=\" (\" suffix=\")\"/>\n              <substitute>\n                <names variable=\"editorial-director\"/>\n                <names variable=\"collection-editor\"/>\n                <names variable=\"container-author\"/>\n              </substitute>\n            </names>\n            <group delimiter=\": \" font-style=\"italic\">\n              <text variable=\"container-title\"/>\n              <!-- Replace with volume-title as that becomes available -->\n              <choose>\n                <if is-numeric=\"volume\" match=\"none\">\n                  <group delimiter=\" \">\n                    <label variable=\"volume\" form=\"short\" text-case=\"capitalize-first\"/>\n                    <text variable=\"volume\"/>\n                  </group>\n                </if>\n              </choose>\n            </group>\n          </group>\n          <text macro=\"parenthetical-container\"/>\n          <text macro=\"bracketed-container\"/>\n        </group>\n      </if>\n    </choose>\n  </macro>\n  <macro name=\"publisher\">\n    <group delimiter=\"; \">\n      <choose>\n        <if type=\"thesis\">\n          <choose>\n            <if variable=\"archive DOI URL\" match=\"none\">\n              <text variable=\"publisher\"/>\n            </if>\n          </choose>\n        </if>\n        <else-if type=\"post webpage\" match=\"any\">\n          <!-- For websites, treat container title like publisher -->\n          <group delimiter=\"; \">\n            <text variable=\"container-title\" text-case=\"title\"/>\n            <text variable=\"publisher\"/>\n          </group>\n        </else-if>\n        <else-if type=\"paper-conference\">\n          <!-- For paper-conference, don't print publisher if in a journal-like proceedings -->\n          <choose>\n            <if variable=\"collection-editor editor editorial-director\" match=\"any\">\n              <text variable=\"publisher\"/>\n            </if>\n          </choose>\n        </else-if>\n        <else-if type=\"article-journal article-magazine article-newspaper post-weblog\" match=\"none\">\n          <text variable=\"publisher\"/>\n        </else-if>\n      </choose>\n      <group delimiter=\", \">\n        <choose>\n          <if variable=\"archive-place\">\n            <!-- With `archive-place`: physical archives. Without: online archives. -->\n            <!-- For physical archives, print the location before the archive name.\n                For electronic archives, these are printed in macro=\"description\". -->\n            <!-- Split \"archive_location\" into \"archive_collection\" and \"archive_location\" as that becomes available -->\n            <!-- Must test for archive_collection:\n                With collection: archive_collection (archive_location), archive, archive-place\n                No collection: archive (archive_location), archive-place\n            -->\n            <text variable=\"archive_location\"/>\n          </if>\n        </choose>\n        <text variable=\"archive\"/>\n        <text variable=\"archive-place\"/>\n      </group>\n    </group>\n  </macro>\n  <macro name=\"access\">\n    <choose>\n      <if variable=\"DOI\" match=\"any\">\n        <text variable=\"DOI\" prefix=\"https://doi.org/\"/>\n      </if>\n      <else-if variable=\"URL\">\n        <group delimiter=\" \">\n          <choose>\n            <if variable=\"issued status\" match=\"none\">\n              <group delimiter=\" \">\n                <text term=\"retrieved\" text-case=\"capitalize-first\"/>\n                <date variable=\"accessed\" form=\"text\" suffix=\",\"/>\n                <text term=\"from\"/>\n              </group>\n            </if>\n          </choose>\n          <text variable=\"URL\"/>\n        </group>\n      </else-if>\n    </choose>\n  </macro>\n  <macro name=\"event\">\n    <choose>\n      <if variable=\"event\">\n        <!-- To prevent Zotero from printing event-place due to its double-mapping of all 'place' to\n             both publisher-place and event-place. Remove this 'choose' when that is changed. -->\n        <choose>\n          <if variable=\"collection-editor editor editorial-director issue page volume\" match=\"none\">\n            <!-- Don't print event info if published in a proceedings -->\n            <group delimiter=\", \">\n              <text variable=\"event\"/>\n              <text variable=\"event-place\"/>\n            </group>\n          </if>\n        </choose>\n      </if>\n    </choose>\n  </macro>\n  <!-- After 'source', APA also prints publication history (original publication, reprint info, retraction info) -->\n  <macro name=\"publication-history\">\n    <choose>\n      <if type=\"patent\" match=\"none\">\n        <group prefix=\"(\" suffix=\")\">\n          <choose>\n            <if variable=\"references\">\n              <!-- This provides the option for more elaborate description \n                   of publication history, such as full \"reprinted\" references\n                   (examples 11, 43, 44) or retracted references -->\n              <text variable=\"references\"/>\n            </if>\n            <else>\n              <group delimiter=\" \">\n                <text value=\"Original work published\"/>\n                <choose>\n                  <if is-uncertain-date=\"original-date\">\n                    <text term=\"circa\" form=\"short\"/>\n                  </if>\n                </choose>\n                <date variable=\"original-date\">\n                  <date-part name=\"year\"/>\n                </date>\n              </group>\n            </else>\n          </choose>\n        </group>\n      </if>\n      <else>\n        <text variable=\"references\" prefix=\"(\" suffix=\")\"/>\n      </else>\n    </choose>\n  </macro>\n  <!-- Legal citations have their own rules -->\n  <macro name=\"legal-cites\">\n    <choose>\n      <if type=\"legal_case\">\n        <group delimiter=\". \">\n          <group delimiter=\", \">\n            <text variable=\"title\"/>\n            <group delimiter=\" \">\n              <text macro=\"container-legal\"/>\n              <text macro=\"date-legal\"/>\n            </group>\n            <text variable=\"references\"/>\n          </group>\n          <text macro=\"access\"/>\n        </group>\n      </if>\n      <else-if type=\"bill\">\n        <!-- Currently designed to handle bills, resolutions, hearings, rederal reports. -->\n        <group delimiter=\". \">\n          <group delimiter=\", \">\n            <choose>\n              <if variable=\"number container-title\" match=\"none\">\n                <!-- If no number or container-title, then assume it is a hearing -->\n                <text variable=\"title\" font-style=\"italic\"/>\n              </if>\n              <else>\n                <text variable=\"title\"/>\n              </else>\n            </choose>\n            <group delimiter=\" \">\n              <text macro=\"container-legal\"/>\n              <text macro=\"date-legal\"/>\n              <choose>\n                <if variable=\"number container-title\" match=\"none\">\n                  <!-- If no number or container-title, then assume it is a hearing -->\n                  <names variable=\"author\" prefix=\"(testimony of \" suffix=\")\">\n                    <name and=\"symbol\" delimiter=\", \"/>\n                  </names>\n                </if>\n                <else>\n                  <text variable=\"status\" prefix=\"(\" suffix=\")\"/>\n                </else>\n              </choose>\n            </group>\n            <text variable=\"references\"/>\n          </group>\n          <text macro=\"access\"/>\n        </group>\n      </else-if>\n      <else-if type=\"legislation\">\n        <!-- Currently designed to handle statutes, codified regulations, executive orders.\n             For uncodified regulations, assume future code section is in status. -->\n        <group delimiter=\". \">\n          <group delimiter=\", \">\n            <text variable=\"title\"/>\n            <group delimiter=\" \">\n              <text macro=\"container-legal\"/>\n              <text macro=\"date-legal\"/>\n              <text variable=\"status\" prefix=\"(\" suffix=\")\"/>\n            </group>\n            <text variable=\"references\"/>\n          </group>\n          <text macro=\"access\"/>\n        </group>\n      </else-if>\n      <else-if type=\"treaty\">\n        <!-- APA generally defers to Bluebook for legal citations, but diverges without\n             explanation for treaty items. The Bluebook format that was used in APA 6th\n             ed. is used here. -->\n        <group delimiter=\", \">\n          <text variable=\"title\" text-case=\"title\"/>\n          <names variable=\"author\">\n            <name initialize-with=\".\" form=\"short\" delimiter=\"-\"/>\n          </names>\n          <text macro=\"date-legal\"/>\n          <text macro=\"container-legal\"/>\n          <text macro=\"access\"/>\n        </group>\n      </else-if>\n    </choose>\n  </macro>\n  <macro name=\"date-legal\">\n    <choose>\n      <if type=\"legal_case\">\n        <group prefix=\"(\" suffix=\")\" delimiter=\" \">\n          <text variable=\"authority\"/>\n          <choose>\n            <if variable=\"container-title\" match=\"any\">\n              <!-- Print only year for cases published in reporters-->\n              <date variable=\"issued\" form=\"numeric\" date-parts=\"year\"/>\n            </if>\n            <else>\n              <date variable=\"issued\" form=\"text\"/>\n            </else>\n          </choose>\n        </group>\n      </if>\n      <else-if type=\"bill legislation\" match=\"any\">\n        <group prefix=\"(\" suffix=\")\" delimiter=\" \">\n          <group delimiter=\" \">\n            <date variable=\"original-date\">\n              <date-part name=\"year\"/>\n            </date>\n            <text term=\"and\" form=\"symbol\"/>\n          </group>\n          <date variable=\"issued\">\n            <date-part name=\"year\"/>\n          </date>\n        </group>\n      </else-if>\n      <else-if type=\"treaty\">\n        <date variable=\"issued\" form=\"text\"/>\n      </else-if>\n    </choose>\n  </macro>\n  <macro name=\"container-legal\">\n    <!-- Expect legal item container-titles to be stored in short form -->\n    <choose>\n      <if type=\"legal_case\">\n        <group delimiter=\" \">\n          <choose>\n            <if variable=\"container-title\">\n              <group delimiter=\" \">\n                <text variable=\"volume\"/>\n                <text variable=\"container-title\"/>\n                <group delimiter=\" \">\n                  <!-- Change to label variable=\"section\" as that becomes available -->\n                  <text term=\"section\" form=\"symbol\"/>\n                  <text variable=\"section\"/>\n                </group>\n                <choose>\n                  <if variable=\"page page-first\" match=\"any\">\n                    <text variable=\"page-first\"/>\n                  </if>\n                  <else>\n                    <text value=\"___\"/>\n                  </else>\n                </choose>\n              </group>\n            </if>\n            <else>\n              <group delimiter=\" \">\n                <choose>\n                  <if is-numeric=\"number\">\n                    <!-- Replace with label variable=\"number\" if that becomes available -->\n                    <text term=\"issue\" form=\"short\" text-case=\"capitalize-first\"/>\n                  </if>\n                </choose>\n                <text variable=\"number\"/>\n              </group>\n            </else>\n          </choose>\n        </group>\n      </if>\n      <else-if type=\"bill\">\n        <group delimiter=\", \">\n          <group delimiter=\" \">\n            <text variable=\"genre\"/>\n            <group delimiter=\" \">\n              <choose>\n                <if variable=\"chapter-number container-title\" match=\"none\">\n                  <!-- Replace with label variable=\"number\" as that becomes available -->\n                  <text term=\"issue\" form=\"short\"/>\n                </if>\n              </choose>\n              <text variable=\"number\"/>\n            </group>\n          </group>\n          <text variable=\"authority\"/>\n          <text variable=\"chapter-number\"/>\n          <group delimiter=\" \">\n            <text variable=\"volume\"/>\n            <text variable=\"container-title\"/>\n            <text variable=\"page-first\"/>\n          </group>\n        </group>\n      </else-if>\n      <else-if type=\"legislation\">\n        <choose>\n          <if variable=\"number\">\n            <!--There's a public law number-->\n            <group delimiter=\", \">\n              <text variable=\"number\" prefix=\"Pub. L. No. \"/>\n              <group delimiter=\" \">\n                <text variable=\"volume\"/>\n                <text variable=\"container-title\"/>\n                <text variable=\"page-first\"/>\n              </group>\n            </group>\n          </if>\n          <else>\n            <group delimiter=\" \">\n              <text variable=\"volume\"/>\n              <text variable=\"container-title\"/>\n              <choose>\n                <if variable=\"section\">\n                  <group delimiter=\" \">\n                    <!-- Change to label variable=\"section\" as that becomes available -->\n                    <text term=\"section\" form=\"symbol\"/>\n                    <text variable=\"section\"/>\n                  </group>\n                </if>\n                <else>\n                  <text variable=\"page-first\"/>\n                </else>\n              </choose>\n            </group>\n          </else>\n        </choose>\n      </else-if>\n      <else-if type=\"treaty\">\n        <group delimiter=\" \">\n          <number variable=\"volume\"/>\n          <text variable=\"container-title\"/>\n          <choose>\n            <if variable=\"page page-first\" match=\"any\">\n              <text variable=\"page-first\"/>\n            </if>\n            <else>\n              <group delimiter=\" \">\n                <!-- Replace with label variable=\"number\" if that becomes available -->\n                <text term=\"issue\" form=\"short\" text-case=\"capitalize-first\"/>\n                <text variable=\"number\"/>\n              </group>\n            </else>\n          </choose>\n        </group>\n      </else-if>\n    </choose>\n  </macro>\n  <macro name=\"citation-locator\">\n    <group delimiter=\" \">\n      <choose>\n        <if locator=\"chapter\">\n          <label variable=\"locator\" text-case=\"capitalize-first\"/>\n        </if>\n        <else>\n          <label variable=\"locator\" form=\"short\"/>\n        </else>\n      </choose>\n      <text variable=\"locator\"/>\n    </group>\n  </macro>\n  <citation et-al-min=\"3\" et-al-use-first=\"1\" disambiguate-add-year-suffix=\"true\" disambiguate-add-names=\"true\" disambiguate-add-givenname=\"true\" collapse=\"year\" givenname-disambiguation-rule=\"primary-name\">\n    <sort>\n      <key macro=\"author-bib\" names-min=\"3\" names-use-first=\"1\"/>\n      <key macro=\"date-sort-group\"/>\n      <key macro=\"date-sort-date\" sort=\"ascending\"/>\n      <key variable=\"status\"/>\n    </sort>\n    <layout prefix=\"(\" suffix=\")\" delimiter=\"; \">\n      <group delimiter=\", \">\n        <text macro=\"author-intext\"/>\n        <text macro=\"date-intext\"/>\n        <text macro=\"citation-locator\"/>\n      </group>\n    </layout>\n  </citation>\n  <bibliography hanging-indent=\"true\" et-al-min=\"21\" et-al-use-first=\"19\" et-al-use-last=\"true\" entry-spacing=\"0\" line-spacing=\"2\">\n    <sort>\n      <key macro=\"author-bib\"/>\n      <key macro=\"date-sort-group\"/>\n      <key macro=\"date-sort-date\" sort=\"ascending\"/>\n      <key variable=\"status\"/>\n      <key macro=\"title\"/>\n    </sort>\n    <layout>\n      <choose>\n        <if type=\"bill legal_case legislation treaty\" match=\"any\">\n          <!-- Legal items have different orders and delimiters -->\n          <choose>\n            <if variable=\"DOI URL\" match=\"any\">\n              <text macro=\"legal-cites\"/>\n            </if>\n            <else>\n              <text macro=\"legal-cites\" suffix=\".\"/>\n            </else>\n          </choose>\n        </if>\n        <else>\n          <group delimiter=\" \">\n            <group delimiter=\". \" suffix=\".\">\n              <text macro=\"author-bib\"/>\n              <text macro=\"date-bib\"/>\n              <text macro=\"title-and-descriptions\"/>\n              <text macro=\"container\"/>\n              <text macro=\"event\"/>\n              <text macro=\"publisher\"/>\n            </group>\n            <text macro=\"access\"/>\n            <text macro=\"publication-history\"/>\n          </group>\n        </else>\n      </choose>\n    </layout>\n  </bibliography>\n</style>\n"
  },
  {
    "path": "paper/JOSS_files/paper.Rmd",
    "content": "---\ntitle: \"datawizard: An R Package for Easy Data Preparation and Statistical Transformations\"\ntags:\n  - R\n  - easystats\nauthors:\n- affiliation: 1\n  name: Indrajeet Patil\n  orcid: 0000-0003-1995-6531\n- affiliation: 2\n  name: Dominique Makowski\n  orcid: 0000-0001-5375-9967\n- affiliation: 3\n  name: Mattan S. Ben-Shachar\n  orcid: 0000-0002-4287-4801\n- affiliation: 4\n  name: Brenton M. Wiernik^[Brenton Wiernik is currently an independent researcher and Research Scientist at Meta, Demography and Survey Science. The current work was done in an independent capacity.]\n  orcid: 0000-0001-9560-6336\n- affiliation: 5\n  name: Etienne Bacher\n  orcid: 0000-0002-9271-5075 \n- affiliation: 6\n  name: Daniel Lüdecke\n  orcid: 0000-0002-8895-3206\n  \naffiliations:\n- index: 1\n  name: cynkra Analytics GmbH, Germany\n- index: 2\n  name: Nanyang Technological University, Singapore\n- index: 3\n  name: Ben-Gurion University of the Negev, Israel\n- index: 4\n  name: Independent Researcher\n- index: 5\n  name: Luxembourg Institute of Socio-Economic Research (LISER), Luxembourg\n- index: 6\n  name: University Medical Center Hamburg-Eppendorf, Germany\n    \ndate: \"`r Sys.Date()`\"\nbibliography: paper.bib\noutput: rticles::joss_article\ncsl: apa.csl\njournal: JOSS\nlink-citations: yes\n---\n\n```{r, warning=FALSE, message=FALSE, echo=FALSE}\nknitr::opts_chunk$set(\n  collapse = TRUE,\n  out.width = \"100%\",\n  dpi = 300,\n  comment = \"#>\",\n  message = FALSE,\n  warning = FALSE\n)\n\nlibrary(datawizard)\nset.seed(2016)\n```\n\n# Summary\n\nThe `{datawizard}` package for the R programming language [@base2021] provides a lightweight toolbox to assist in key steps involved in any data analysis workflow: (1) wrangling the raw data to get it in the needed form, (2) applying preprocessing steps and statistical transformations, and (3) compute statistical summaries of data properties and distributions. Therefore, it can be a valuable tool for R users and developers looking for a lightweight option for data preparation.\n\n# Statement of Need\n\nThe `{datawizard}` package is part of `{easystats}`, a collection of R packages designed to make statistical analysis easier (@Ben-Shachar2020, @Lüdecke2020parameters, @Lüdecke2020performance, @Lüdecke2021see, @Lüdecke2019, @Makowski2019, @Makowski2020). As this ecosystem follows a \"0-external-hard-dependency\" policy, a data manipulation package that relies only on base R needed to be created. In effect, `{datawizard}` provides a data processing backend for this entire ecosystem. \nIn addition to its usefulness to the `{easystats}` ecosystem, it also provides *an* option for R users and package developers if they wish to keep their (recursive) dependency weight to a minimum (for other options, see @Dowle2021, @Eastwood2021).\n\nBecause `{datawizard}` is also meant to be used and adopted easily by a wide range of users, its workflow and syntax are designed to be similar to `{tidyverse}` [@Wickham2019], a widely used ecosystem of R packages. Thus, users familiar with the `{tidyverse}` can easily translate their knowledge and make full use of `{datawizard}`.\n\nIn addition to being a lightweight solution to clean messy data, `{datawizard}` also provides helpers for the other important step of data analysis: applying statistical transformations to the cleaned data while setting up statistical models. This includes various types of data standardization, normalization, rank-transformation, and adjustment. These transformations, although widely used, are not currently collectively implemented in a package in the R ecosystem, so `{datawizard}` can help new R users in finding the transformation they need.\n\nLastly, `{datawizard}` also provides a toolbox to create detailed summaries of data properties and distributions (e.g., tables of descriptive statistics for each variable). This is a common step in data analysis, but it is not available in base R or many modeling packages, so its inclusion makes `{datawizard}` a one-stop-shop for data preparation tasks.\n\n# Features\n\n## Data Preparation\n\nThe raw data is rarely in a state that it can be directly fed into a statistical model. It often needs to be modified in various ways. For example, columns need to be renamed or reshaped, certain portions of the data need to be filtered out, data scattered across multiple tables needs to be joined, etc. \n\n`{datawizard}` provides various functions for cleaning and preparing data (see Table 1).\n\n| Function         | Operation                             |\n| :--------------- | :------------------------------------ |\n| `data_filter()`  | to select only certain *observations* |\n| `data_select()`  | to select only certain *variables*    |\n| `data_extract()` | to extract a single *variable*        |\n| `data_rename()`  | to rename variables                   |\n| `data_to_long()` | to convert data from wide to long     |\n| `data_to_wide()` | to convert data from long to wide     |\n| `data_join()`    | to join two data frames               |\n| ...              | ...                                   |\n\nTable: The table below lists a few key functions offered by `{datawizard}` for data wrangling. To see the full list, see the package website: <https://easystats.github.io/datawizard/>\n\nWe will look at one example function that converts data in wide format to tidy/long format:\n\n```{r}\nstocks <- data.frame(\n  time = as.Date(\"2009-01-01\") + 0:4,\n  X = rnorm(5, 0, 1),\n  Y = rnorm(5, 0, 2)\n)\n\nstocks\n\ndata_to_long(\n  stocks,\n  select = -c(\"time\"),\n  names_to = \"stock\",\n  values_to = \"price\"\n)\n```\n\n## Statistical Transformations\n\nEven after getting the raw data in the needed format, we may need to transform certain variables further to meet requirements imposed by a statistical test.\n\n`{datawizard}` provides a rich collection of such functions for transforming variables (see Table 2).\n\n| Function          | Operation                                    |\n| :---------------- | :------------------------------------------- |\n| `standardize()`   | to center and scale data                     |\n| `normalize()`     | to scale variables to 0-1 range              |\n| `adjust()`        | to adjust data for effect of other variables |\n| `slide()`         | to shift numeric value range                 |\n| `ranktransform()` | to convert numeric values to integer ranks   |\n| ...               | ...                                          |\n\nTable: The table below lists a few key functions offered by `{datawizard}` for data transformations. To see the full list, see the package website: <https://easystats.github.io/datawizard/>\n\nWe will look at one example function that standardizes (i.e. centers and scales) data so that it can be expressed in terms of standard deviation:\n\n```{r}\nd <- data.frame(\n  a = c(-2, -1, 0, 1, 2),\n  b = c(3, 4, 5, 6, 7)\n)\n\nstandardize(d, center = c(3, 4), scale = c(2, 4))\n```\n\n## Summaries of Data Properties and Distributions\n\nThe workhorse function to get a comprehensive summary of data properties is `describe_distribution()`, which combines a set of indices (e.g., measures of centrality, dispersion, range, skewness, kurtosis, etc.) computed by other functions in `{datawizard}`.\n\n```{r eval=FALSE}\ndescribe_distribution(mtcars)\n```\n\n```{r echo=FALSE, eval=TRUE, results=\"asis\"}\nlibrary(kableExtra)\noptions(digits = 2)\nkbl(describe_distribution(mtcars), format = \"latex\", booktabs = TRUE, linesep = \"\") \n```\n\n# Licensing and Availability\n\n`{datawizard}` is licensed under the GNU General Public License (v3.0), with all source code openly developed and stored on GitHub (<https://github.com/easystats/datawizard>), along with a corresponding issue tracker for bug reporting and feature enhancements. In the spirit of honest and open science, we encourage requests, tips for fixes, feature updates, as well as general questions and concerns via direct interaction with contributors and developers.\n\n# Acknowledgments\n\n`{datawizard}` is part of the collaborative [*easystats*](https://easystats.github.io/easystats/) ecosystem. Thus, we thank the [members of easystats](https://github.com/orgs/easystats/people) as well as the users.\n\n# References\n"
  },
  {
    "path": "paper/JOSS_files/paper.bib",
    "content": "@Article{Ben-Shachar2020,\n    title = {{e}ffectsize: Estimation of Effect Size Indices and Standardized Parameters},\n    author = {Mattan S. Ben-Shachar and Daniel Lüdecke and Dominique Makowski},\n    year = {2020},\n    journal = {Journal of Open Source Software},\n    volume = {5},\n    number = {56},\n    pages = {2815},\n    publisher = {The Open Journal},\n    doi = {10.21105/joss.02815},\n    url = {https://doi.org/10.21105/joss.02815},\n  }\n\n\n@Article{Lüdecke2020parameters,\n    title = {Extracting, Computing and Exploring the Parameters of Statistical Models using {R}.},\n    volume = {5},\n    doi = {10.21105/joss.02445},\n    number = {53},\n    journal = {Journal of Open Source Software},\n    author = {Daniel Lüdecke and Mattan S. Ben-Shachar and Indrajeet Patil and Dominique Makowski},\n    year = {2020},\n    pages = {2445},\n  }\n\n @Article{Lüdecke2021see,\n    title = {{see}: An {R} Package for Visualizing Statistical Models},\n    author = {Daniel Lüdecke and Indrajeet Patil and Mattan S. Ben-Shachar and Brenton M. Wiernik and Philip Waggoner and Dominique Makowski},\n    journal = {Journal of Open Source Software},\n    year = {2021},\n    volume = {6},\n    number = {64},\n    pages = {3393},\n    doi = {10.21105/joss.03393},\n  }\n\n@Article{Lüdecke2020performance,\n    title = {{performance}: An {R} Package for Assessment, Comparison and Testing of Statistical Models},\n    author = {Daniel Lüdecke and Mattan S. Ben-Shachar and Indrajeet Patil and Philip Waggoner and Dominique Makowski},\n    year = {2021},\n    journal = {Journal of Open Source Software},\n    volume = {6},\n    number = {60},\n    pages = {3139},\n    doi = {10.21105/joss.03139},\n  }\n\n\n@Article{Lüdecke2019,\n    title = {{insight}: A Unified Interface to Access Information from Model Objects in {R}.},\n    volume = {4},\n    doi = {10.21105/joss.01412},\n    number = {38},\n    journal = {Journal of Open Source Software},\n    author = {Daniel Lüdecke and Philip Waggoner and Dominique Makowski},\n    year = {2019},\n    pages = {1412},\n  }\n\n\n@Article{Makowski2020,\n    title = {Methods and Algorithms for Correlation Analysis in {R}.},\n    author = {Dominique Makowski and Mattan S. Ben-Shachar and Indrajeet Patil and Daniel Lüdecke},\n    doi = {10.21105/joss.02306},\n    year = {2020},\n    journal = {Journal of Open Source Software},\n    number = {51},\n    volume = {5},\n    pages = {2306},\n    url = {https://joss.theoj.org/papers/10.21105/joss.02306},\n  }\n\n@Article{Patil2021,\n    doi = {10.21105/joss.03167},\n    url = {https://doi.org/10.21105/joss.03167},\n    year = {2021},\n    publisher = {{The Open Journal}},\n    volume = {6},\n    number = {61},\n    pages = {3167},\n    author = {Indrajeet Patil},\n    title = {{Visualizations with statistical details: The {'ggstatsplot'} approach}},\n    journal = {{Journal of Open Source Software}},\n  }\n\n@Article{Makowski2019,\n    title = {{bayestestR}: Describing Effects and their Uncertainty, Existence and Significance within the {B}ayesian Framework.},\n    author = {Dominique Makowski and Mattan S. Ben-Shachar and Daniel Lüdecke},\n    journal = {Journal of Open Source Software},\n    doi = {10.21105/joss.01541},\n    year = {2019},\n    number = {40},\n    volume = {4},\n    pages = {1541},\n    url = {https://joss.theoj.org/papers/10.21105/joss.01541},\n  }\n\n@Article{Wickham2019,\n    title = {Welcome to the {tidyverse}},\n    author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},\n    year = {2019},\n    journal = {Journal of Open Source Software},\n    volume = {4},\n    number = {43},\n    pages = {1686},\n    doi = {10.21105/joss.01686},\n  }\n\n@Article{Makowski2020modelbased,\n    title = {Estimation of Model-Based Predictions, Contrasts and Means.},\n    author = {Dominique Makowski and Mattan S. Ben-Shachar and Indrajeet Patil and Daniel Lüdecke},\n    journal = {CRAN},\n    year = {2020},\n    url = {https://github.com/easystats/modelbased},\n  }\n\n@Manual{base2021,\n    title = {{R}: A Language and Environment for Statistical Computing},\n    author = {{R Core Team}},\n    organization = {R Foundation for Statistical Computing},\n    address = {Vienna, Austria},\n    year = {2021},\n    url = {https://www.R-project.org/},\n  }\n\n  @Manual{Eastwood2021,\n    title = {poorman: A Poor Man's Dependency Free Recreation of 'dplyr'},\n    author = {Nathan Eastwood},\n    year = {2021},\n    note = {R package version 0.2.5},\n    url = {https://CRAN.R-project.org/package=poorman},\n  }\n\n@Manual{Dowle2021,\n    title = {data.table: Extension of `data.frame`},\n    author = {Matt Dowle and Arun Srinivasan},\n    year = {2021},\n    note = {R package version 1.14.2},\n    url = {https://CRAN.R-project.org/package=data.table},\n  }\n"
  },
  {
    "path": "paper/JOSS_files/paper.log",
    "content": "This is XeTeX, Version 3.141592653-2.6-0.999994 (TeX Live 2022) (preloaded format=xelatex 2022.9.27)  4 OCT 2022 17:54\nentering extended mode\n restricted \\write18 enabled.\n %&-line parsing enabled.\n**paper.tex\n(./paper.tex\nLaTeX2e <2022-06-01> patch level 5\nL3 programming layer <2022-08-30> (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/base/article.cls\nDocument Class: article 2021/10/04 v1.4n Standard LaTeX document class\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/base/size10.clo\nFile: size10.clo 2021/10/04 v1.4n Standard LaTeX file (size option)\n)\n\\c@part=\\count181\n\\c@section=\\count182\n\\c@subsection=\\count183\n\\c@subsubsection=\\count184\n\\c@paragraph=\\count185\n\\c@subparagraph=\\count186\n\\c@figure=\\count187\n\\c@table=\\count188\n\\abovecaptionskip=\\skip47\n\\belowcaptionskip=\\skip48\n\\bibindent=\\dimen138\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/marginnote/marginnote.sty\nPackage: marginnote 2018/08/09 v1.4b non floating margin notes for LaTeX\n\\c@mn@abspage=\\count189\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphicx.sty\nPackage: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics/keyval.sty\nPackage: keyval 2022/05/29 v1.15 key=value parser (DPC)\n\\KV@toks@=\\toks16\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics/graphics.sty\nPackage: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics/trig.sty\nPackage: trig 2021/08/11 v1.11 sin cos tan (DPC)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/graphics.cfg\nFile: graphics.cfg 2016/06/04 v1.11 sample graphics configuration\n)\nPackage graphics Info: Driver file: xetex.def on input line 107.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics-def/xetex.def\nFile: xetex.def 2022/09/22 v5.0n Graphics/color driver for xetex\n\\stockwidth=\\dimen139\n\\stockheight=\\dimen140\n))\n\\Gin@req@height=\\dimen141\n\\Gin@req@width=\\dimen142\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/xcolor/xcolor.sty\nPackage: xcolor 2022/06/12 v2.14 LaTeX color extensions (UK)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics-cfg/color.cfg\nFile: color.cfg 2016/01/02 v1.6 sample color configuration\n)\nPackage xcolor Info: Driver file: xetex.def on input line 227.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics/mathcolor.ltx)\nPackage xcolor Info: Model `cmy' substituted by `cmy0' on input line 1353.\nPackage xcolor Info: Model `RGB' extended on input line 1369.\nPackage xcolor Info: Model `HTML' substituted by `rgb' on input line 1371.\nPackage xcolor Info: Model `Hsb' substituted by `hsb' on input line 1372.\nPackage xcolor Info: Model `tHsb' substituted by `hsb' on input line 1373.\nPackage xcolor Info: Model `HSB' substituted by `hsb' on input line 1374.\nPackage xcolor Info: Model `Gray' substituted by `gray' on input line 1375.\nPackage xcolor Info: Model `wave' substituted by `hsb' on input line 1376.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/preprint/authblk.sty\nPackage: authblk 2001/02/27 1.3 (PWD)\n\\affilsep=\\skip49\n\\@affilsep=\\skip50\n\\c@Maxaffil=\\count190\n\\c@authors=\\count191\n\\c@affil=\\count192\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/etoolbox/etoolbox.sty\nPackage: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW)\n\\etb@tempcnta=\\count193\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/titlesec/titlesec.sty\nPackage: titlesec 2021/07/05 v2.14 Sectioning titles\n\\ttl@box=\\box51\n\\beforetitleunit=\\skip51\n\\aftertitleunit=\\skip52\n\\ttl@plus=\\dimen143\n\\ttl@minus=\\dimen144\n\\ttl@toksa=\\toks17\n\\titlewidth=\\dimen145\n\\titlewidthlast=\\dimen146\n\\titlewidthfirst=\\dimen147\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/tools/calc.sty\nPackage: calc 2017/05/25 v4.3 Infix arithmetic (KKT,FJ)\n\\calc@Acount=\\count194\n\\calc@Bcount=\\count195\n\\calc@Adimen=\\dimen148\n\\calc@Bdimen=\\dimen149\n\\calc@Askip=\\skip53\n\\calc@Bskip=\\skip54\nLaTeX Info: Redefining \\setlength on input line 80.\nLaTeX Info: Redefining \\addtolength on input line 81.\n\\calc@Ccount=\\count196\n\\calc@Cskip=\\skip55\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/frontendlayer/tikz.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgf.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfrcs.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-common.tex\n\\pgfutil@everybye=\\toks18\n\\pgfutil@tempdima=\\dimen150\n\\pgfutil@tempdimb=\\dimen151\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-common-lists.tex)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfutil-latex.def\n\\pgfutil@abb=\\box52\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfrcs.code.tex (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/pgf.revision.tex)\nPackage: pgfrcs 2021/05/15 v3.1.9a (3.1.9a)\n))\nPackage: pgf 2021/05/15 v3.1.9a (3.1.9a)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/basiclayer/pgfcore.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/systemlayer/pgfsys.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys.code.tex\nPackage: pgfsys 2021/05/15 v3.1.9a (3.1.9a)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex\n\\pgfkeys@pathtoks=\\toks19\n\\pgfkeys@temptoks=\\toks20\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeysfiltered.code.tex\n\\pgfkeys@tmptoks=\\toks21\n))\n\\pgf@x=\\dimen152\n\\pgf@y=\\dimen153\n\\pgf@xa=\\dimen154\n\\pgf@ya=\\dimen155\n\\pgf@xb=\\dimen156\n\\pgf@yb=\\dimen157\n\\pgf@xc=\\dimen158\n\\pgf@yc=\\dimen159\n\\pgf@xd=\\dimen160\n\\pgf@yd=\\dimen161\n\\w@pgf@writea=\\write3\n\\r@pgf@reada=\\read2\n\\c@pgf@counta=\\count197\n\\c@pgf@countb=\\count198\n\\c@pgf@countc=\\count199\n\\c@pgf@countd=\\count266\n\\t@pgf@toka=\\toks22\n\\t@pgf@tokb=\\toks23\n\\t@pgf@tokc=\\toks24\n\\pgf@sys@id@count=\\count267\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgf.cfg\nFile: pgf.cfg 2021/05/15 v3.1.9a (3.1.9a)\n)\nDriver file for pgf: pgfsys-xetex.def\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-xetex.def\nFile: pgfsys-xetex.def 2021/05/15 v3.1.9a (3.1.9a)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-dvipdfmx.def\nFile: pgfsys-dvipdfmx.def 2021/05/15 v3.1.9a (3.1.9a)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsys-common-pdf.def\nFile: pgfsys-common-pdf.def 2021/05/15 v3.1.9a (3.1.9a)\n)\n\\pgfsys@objnum=\\count268\n))) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsyssoftpath.code.tex\nFile: pgfsyssoftpath.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgfsyssoftpath@smallbuffer@items=\\count269\n\\pgfsyssoftpath@bigbuffer@items=\\count270\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/systemlayer/pgfsysprotocol.code.tex\nFile: pgfsysprotocol.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcore.code.tex\nPackage: pgfcore 2021/05/15 v3.1.9a (3.1.9a)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathcalc.code.tex (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathutil.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathparser.code.tex\n\\pgfmath@dimen=\\dimen162\n\\pgfmath@count=\\count271\n\\pgfmath@box=\\box53\n\\pgfmath@toks=\\toks25\n\\pgfmath@stack@operand=\\toks26\n\\pgfmath@stack@operation=\\toks27\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.code.tex (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.basic.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.trigonometric.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.random.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.comparison.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.base.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.round.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.misc.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfunctions.integerarithmetics.code.tex))) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmathfloat.code.tex\n\\c@pgfmathroundto@lastzeros=\\count272\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfint.code.tex) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepoints.code.tex\nFile: pgfcorepoints.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgf@picminx=\\dimen163\n\\pgf@picmaxx=\\dimen164\n\\pgf@picminy=\\dimen165\n\\pgf@picmaxy=\\dimen166\n\\pgf@pathminx=\\dimen167\n\\pgf@pathmaxx=\\dimen168\n\\pgf@pathminy=\\dimen169\n\\pgf@pathmaxy=\\dimen170\n\\pgf@xx=\\dimen171\n\\pgf@xy=\\dimen172\n\\pgf@yx=\\dimen173\n\\pgf@yy=\\dimen174\n\\pgf@zx=\\dimen175\n\\pgf@zy=\\dimen176\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathconstruct.code.tex\nFile: pgfcorepathconstruct.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgf@path@lastx=\\dimen177\n\\pgf@path@lasty=\\dimen178\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathusage.code.tex\nFile: pgfcorepathusage.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgf@shorten@end@additional=\\dimen179\n\\pgf@shorten@start@additional=\\dimen180\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorescopes.code.tex\nFile: pgfcorescopes.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgfpic=\\box54\n\\pgf@hbox=\\box55\n\\pgf@layerbox@main=\\box56\n\\pgf@picture@serial@count=\\count273\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoregraphicstate.code.tex\nFile: pgfcoregraphicstate.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgflinewidth=\\dimen181\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransformations.code.tex\nFile: pgfcoretransformations.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgf@pt@x=\\dimen182\n\\pgf@pt@y=\\dimen183\n\\pgf@pt@temp=\\dimen184\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorequick.code.tex\nFile: pgfcorequick.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreobjects.code.tex\nFile: pgfcoreobjects.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepathprocessing.code.tex\nFile: pgfcorepathprocessing.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorearrows.code.tex\nFile: pgfcorearrows.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgfarrowsep=\\dimen185\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreshade.code.tex\nFile: pgfcoreshade.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgf@max=\\dimen186\n\\pgf@sys@shading@range@num=\\count274\n\\pgf@shadingcount=\\count275\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreimage.code.tex\nFile: pgfcoreimage.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoreexternal.code.tex\nFile: pgfcoreexternal.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgfexternal@startupbox=\\box57\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorelayers.code.tex\nFile: pgfcorelayers.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcoretransparency.code.tex\nFile: pgfcoretransparency.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorepatterns.code.tex\nFile: pgfcorepatterns.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/basiclayer/pgfcorerdf.code.tex\nFile: pgfcorerdf.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n))) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleshapes.code.tex\nFile: pgfmoduleshapes.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgfnodeparttextbox=\\box58\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmoduleplot.code.tex\nFile: pgfmoduleplot.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-0-65.sty\nPackage: pgfcomp-version-0-65 2021/05/15 v3.1.9a (3.1.9a)\n\\pgf@nodesepstart=\\dimen187\n\\pgf@nodesepend=\\dimen188\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/compatibility/pgfcomp-version-1-18.sty\nPackage: pgfcomp-version-1-18 2021/05/15 v3.1.9a (3.1.9a)\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgffor.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/utilities/pgfkeys.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgfkeys.code.tex)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pgf/math/pgfmath.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/utilities/pgffor.code.tex\nPackage: pgffor 2021/05/15 v3.1.9a (3.1.9a)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/math/pgfmath.code.tex)\n\\pgffor@iter=\\dimen189\n\\pgffor@skip=\\dimen190\n\\pgffor@stack=\\toks28\n\\pgffor@toks=\\toks29\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/tikz.code.tex\nPackage: tikz 2021/05/15 v3.1.9a (3.1.9a)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/libraries/pgflibraryplothandlers.code.tex\nFile: pgflibraryplothandlers.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgf@plot@mark@count=\\count276\n\\pgfplotmarksize=\\dimen191\n)\n\\tikz@lastx=\\dimen192\n\\tikz@lasty=\\dimen193\n\\tikz@lastxsaved=\\dimen194\n\\tikz@lastysaved=\\dimen195\n\\tikz@lastmovetox=\\dimen196\n\\tikz@lastmovetoy=\\dimen197\n\\tikzleveldistance=\\dimen198\n\\tikzsiblingdistance=\\dimen199\n\\tikz@figbox=\\box59\n\\tikz@figbox@bg=\\box60\n\\tikz@tempbox=\\box61\n\\tikz@tempbox@bg=\\box62\n\\tikztreelevel=\\count277\n\\tikznumberofchildren=\\count278\n\\tikznumberofcurrentchild=\\count279\n\\tikz@fig@count=\\count280\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/modules/pgfmodulematrix.code.tex\nFile: pgfmodulematrix.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n\\pgfmatrixcurrentrow=\\count281\n\\pgfmatrixcurrentcolumn=\\count282\n\\pgf@matrix@numberofcolumns=\\count283\n)\n\\tikz@expandcount=\\count284\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pgf/frontendlayer/tikz/libraries/tikzlibrarytopaths.code.tex\nFile: tikzlibrarytopaths.code.tex 2021/05/15 v3.1.9a (3.1.9a)\n))) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hyperref.sty\nPackage: hyperref 2022-09-22 v7.00t Hypertext links for LaTeX\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty\nPackage: ltxcmds 2020-05-10 v1.25 LaTeX kernel commands for general use (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/iftex/iftex.sty\nPackage: iftex 2022/02/03 v1.0f TeX engine tests\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pdftexcmds/pdftexcmds.sty\nPackage: pdftexcmds 2020-06-27 v0.33 Utility functions of pdfTeX for LuaTeX (HO)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/infwarerr/infwarerr.sty\nPackage: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)\n)\nPackage pdftexcmds Info: \\pdf@primitive is available.\nPackage pdftexcmds Info: \\pdf@ifprimitive is available.\nPackage pdftexcmds Info: \\pdfdraftmode not found.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty\nPackage: kvsetkeys 2019/12/15 v1.18 Key value parser (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty\nPackage: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/pdfescape/pdfescape.sty\nPackage: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/hycolor/hycolor.sty\nPackage: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty\nPackage: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/auxhook/auxhook.sty\nPackage: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/hyperref/nameref.sty\nPackage: nameref 2022-05-17 v2.50 Cross-referencing by name of section\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/refcount/refcount.sty\nPackage: refcount 2019/12/15 v3.6 Data extraction from label references (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty\nPackage: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/kvoptions/kvoptions.sty\nPackage: kvoptions 2022-06-15 v3.15 Key value format for package options (HO)\n))\n\\c@section@level=\\count285\n)\n\\@linkdim=\\dimen256\n\\Hy@linkcounter=\\count286\n\\Hy@pagecounter=\\count287\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/hyperref/pd1enc.def\nFile: pd1enc.def 2022-09-22 v7.00t Hyperref: PDFDocEncoding definition (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/intcalc/intcalc.sty\nPackage: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/etexcmds/etexcmds.sty\nPackage: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO)\n)\n\\Hy@SavedSpaceFactor=\\count288\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/hyperref/puenc.def\nFile: puenc.def 2022-09-22 v7.00t Hyperref: PDF Unicode definition (HO)\n)\nPackage hyperref Info: Hyper figures OFF on input line 4162.\nPackage hyperref Info: Link nesting OFF on input line 4167.\nPackage hyperref Info: Hyper index ON on input line 4170.\nPackage hyperref Info: Plain pages OFF on input line 4177.\nPackage hyperref Info: Backreferencing OFF on input line 4182.\nPackage hyperref Info: Implicit mode ON; LaTeX internals redefined.\nPackage hyperref Info: Bookmarks ON on input line 4410.\n\\c@Hy@tempcnt=\\count289\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/url/url.sty\n\\Urlmuskip=\\muskip16\nPackage: url 2013/09/16  ver 3.4  Verb mode for urls, etc.\n)\nLaTeX Info: Redefining \\url on input line 4748.\n\\XeTeXLinkMargin=\\dimen257\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/bitset/bitset.sty\nPackage: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty\nPackage: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO)\n))\n\\Fld@menulength=\\count290\n\\Field@Width=\\dimen258\n\\Fld@charsize=\\dimen259\nPackage hyperref Info: Hyper figures OFF on input line 6027.\nPackage hyperref Info: Link nesting OFF on input line 6032.\nPackage hyperref Info: Hyper index ON on input line 6035.\nPackage hyperref Info: backreferencing OFF on input line 6042.\nPackage hyperref Info: Link coloring OFF on input line 6047.\nPackage hyperref Info: Link coloring with OCG OFF on input line 6052.\nPackage hyperref Info: PDF/A mode OFF on input line 6057.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/base/atbegshi-ltx.sty\nPackage: atbegshi-ltx 2021/01/10 v1.0c Emulation of the original atbegshi\npackage with kernel methods\n)\n\\Hy@abspage=\\count291\n\\c@Item=\\count292\n\\c@Hfootnote=\\count293\n)\nPackage hyperref Info: Driver (autodetected): hxetex.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/hyperref/hxetex.def\nFile: hxetex.def 2022-09-22 v7.00t Hyperref driver for XeTeX\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/stringenc/stringenc.sty\nPackage: stringenc 2019/11/29 v1.12 Convert strings between diff. encodings (HO)\n)\n\\pdfm@box=\\box63\n\\c@Hy@AnnotLevel=\\count294\n\\HyField@AnnotCount=\\count295\n\\Fld@listcount=\\count296\n\\c@bookmark@seq@number=\\count297\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty\nPackage: rerunfilecheck 2022-07-10 v1.10 Rerun checks for auxiliary files (HO)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/base/atveryend-ltx.sty\nPackage: atveryend-ltx 2020/08/19 v1.0a Emulation of the original atveryend package\nwith kernel methods\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty\nPackage: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)\n)\nPackage uniquecounter Info: New unique counter `rerunfilecheck' on input line 285.\n)\n\\Hy@SectionHShift=\\skip56\n)\nPackage hyperref Info: Option `colorlinks' set `true' on input line 12.\nPackage hyperref Info: Option `breaklinks' set `true' on input line 12.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/caption/caption.sty\nPackage: caption 2022/03/01 v3.6b Customizing captions (AR)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/caption/caption3.sty\nPackage: caption3 2022/03/17 v2.3b caption3 kernel (AR)\n\\caption@tempdima=\\dimen260\n\\captionmargin=\\dimen261\n\\caption@leftmargin=\\dimen262\n\\caption@rightmargin=\\dimen263\n\\caption@width=\\dimen264\n\\caption@indent=\\dimen265\n\\caption@parindent=\\dimen266\n\\caption@hangindent=\\dimen267\nPackage caption Info: Standard document class detected.\n)\n\\c@caption@flags=\\count298\n\\c@continuedfloat=\\count299\nPackage caption Info: hyperref package is loaded.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/tcolorbox/tcolorbox.sty\nPackage: tcolorbox 2022/06/24 version 5.1.1 text color boxes\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/tools/verbatim.sty\nPackage: verbatim 2020-07-07 v1.5u LaTeX2e package for verbatim enhancements\n\\every@verbatim=\\toks30\n\\verbatim@line=\\toks31\n\\verbatim@in@stream=\\read3\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/environ/environ.sty\nPackage: environ 2014/05/04 v0.3 A new way to define environments\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/trimspaces/trimspaces.sty\nPackage: trimspaces 2009/09/17 v1.1 Trim spaces around a token list\n)\n\\@envbody=\\toks32\n)\n\\tcb@titlebox=\\box64\n\\tcb@upperbox=\\box65\n\\tcb@lowerbox=\\box66\n\\tcb@phantombox=\\box67\n\\c@tcbbreakpart=\\count300\n\\c@tcblayer=\\count301\n\\c@tcolorbox@number=\\count302\n\\tcb@temp=\\box68\n\\tcb@temp=\\box69\n\\tcb@temp=\\box70\n\\tcb@temp=\\box71\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amssymb.sty\nPackage: amssymb 2013/01/14 v3.01 AMS font symbols\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/amsfonts.sty\nPackage: amsfonts 2013/01/14 v3.01 Basic AMSFonts support\n\\@emptytoks=\\toks33\n\\symAMSa=\\mathgroup4\n\\symAMSb=\\mathgroup5\nLaTeX Font Info:    Redeclaring math symbol \\hbar on input line 98.\nLaTeX Font Info:    Overwriting math alphabet `\\mathfrak' in version `bold'\n(Font)                  U/euf/m/n --> U/euf/b/n on input line 106.\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsmath.sty\nPackage: amsmath 2022/04/08 v2.17n AMS math features\n\\@mathmargin=\\skip57\nFor additional information on amsmath, use the `?' option.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amstext.sty\nPackage: amstext 2021/08/26 v2.01 AMS text\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsgen.sty\nFile: amsgen.sty 1999/11/30 v2.0 generic functions\n\\@emptytoks=\\toks34\n\\ex@=\\dimen268\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsbsy.sty\nPackage: amsbsy 1999/11/29 v1.2d Bold Symbols\n\\pmbraise@=\\dimen269\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsmath/amsopn.sty\nPackage: amsopn 2022/04/08 v2.04 operator names\n)\n\\inf@bad=\\count303\nLaTeX Info: Redefining \\frac on input line 234.\n\\uproot@=\\count304\n\\leftroot@=\\count305\nLaTeX Info: Redefining \\overline on input line 399.\nLaTeX Info: Redefining \\colon on input line 410.\n\\classnum@=\\count306\n\\DOTSCASE@=\\count307\nLaTeX Info: Redefining \\ldots on input line 496.\nLaTeX Info: Redefining \\dots on input line 499.\nLaTeX Info: Redefining \\cdots on input line 620.\n\\Mathstrutbox@=\\box72\n\\strutbox@=\\box73\nLaTeX Info: Redefining \\big on input line 722.\nLaTeX Info: Redefining \\Big on input line 723.\nLaTeX Info: Redefining \\bigg on input line 724.\nLaTeX Info: Redefining \\Bigg on input line 725.\n\\big@size=\\dimen270\nLaTeX Font Info:    Redeclaring font encoding OML on input line 743.\nLaTeX Font Info:    Redeclaring font encoding OMS on input line 744.\n\\macc@depth=\\count308\nLaTeX Info: Redefining \\bmod on input line 905.\nLaTeX Info: Redefining \\pmod on input line 910.\nLaTeX Info: Redefining \\smash on input line 940.\nLaTeX Info: Redefining \\relbar on input line 970.\nLaTeX Info: Redefining \\Relbar on input line 971.\n\\c@MaxMatrixCols=\\count309\n\\dotsspace@=\\muskip17\n\\c@parentequation=\\count310\n\\dspbrk@lvl=\\count311\n\\tag@help=\\toks35\n\\row@=\\count312\n\\column@=\\count313\n\\maxfields@=\\count314\n\\andhelp@=\\toks36\n\\eqnshift@=\\dimen271\n\\alignsep@=\\dimen272\n\\tagshift@=\\dimen273\n\\tagwidth@=\\dimen274\n\\totwidth@=\\dimen275\n\\lineht@=\\dimen276\n\\@envbody=\\toks37\n\\multlinegap=\\skip58\n\\multlinetaggap=\\skip59\n\\mathdisplay@stack=\\toks38\nLaTeX Info: Redefining \\[ on input line 2953.\nLaTeX Info: Redefining \\] on input line 2954.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/iftex/ifxetex.sty\nPackage: ifxetex 2019/10/25 v0.7 ifxetex legacy package. Use iftex instead.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/iftex/ifluatex.sty\nPackage: ifluatex 2019/10/25 v1.5 ifluatex legacy package. Use iftex instead.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/seqsplit/seqsplit.sty\nPackage: seqsplit 2006/08/07 v0.1 Splitting long sequences (DNA, RNA, proteins, etc.) \n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/base/fixltx2e.sty\nPackage: fixltx2e 2016/12/29 v2.1a fixes to LaTeX (obsolete)\nApplying: [2015/01/01] Old fixltx2e package on input line 46.\n\nPackage fixltx2e Warning: fixltx2e is not required with releases after 2015\n(fixltx2e)                All fixes are now in the LaTeX kernel.\n(fixltx2e)                See the latexrelease package for details.\n\nAlready applied: [0000/00/00] Old fixltx2e package on input line 53.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/biblatex.sty\nPackage: biblatex 2022/07/12 v3.18b programmable bibliographies (PK/MW)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/logreq/logreq.sty\nPackage: logreq 2010/08/04 v1.0 xml request logger\n\\lrq@indent=\\count315\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/logreq/logreq.def\nFile: logreq.def 2010/08/04 v1.0 logreq spec v1.0\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/base/ifthen.sty\nPackage: ifthen 2022/04/13 v1.1d Standard LaTeX ifthen package (DPC)\n)\n\\c@tabx@nest=\\count316\n\\c@listtotal=\\count317\n\\c@listcount=\\count318\n\\c@liststart=\\count319\n\\c@liststop=\\count320\n\\c@citecount=\\count321\n\\c@citetotal=\\count322\n\\c@multicitecount=\\count323\n\\c@multicitetotal=\\count324\n\\c@instcount=\\count325\n\\c@maxnames=\\count326\n\\c@minnames=\\count327\n\\c@maxitems=\\count328\n\\c@minitems=\\count329\n\\c@citecounter=\\count330\n\\c@maxcitecounter=\\count331\n\\c@savedcitecounter=\\count332\n\\c@uniquelist=\\count333\n\\c@uniquename=\\count334\n\\c@refsection=\\count335\n\\c@refsegment=\\count336\n\\c@maxextratitle=\\count337\n\\c@maxextratitleyear=\\count338\n\\c@maxextraname=\\count339\n\\c@maxextradate=\\count340\n\\c@maxextraalpha=\\count341\n\\c@abbrvpenalty=\\count342\n\\c@highnamepenalty=\\count343\n\\c@lownamepenalty=\\count344\n\\c@maxparens=\\count345\n\\c@parenlevel=\\count346\n\\blx@tempcnta=\\count347\n\\blx@tempcntb=\\count348\n\\blx@tempcntc=\\count349\n\\c@blx@maxsection=\\count350\n\\blx@maxsegment@0=\\count351\n\\blx@notetype=\\count352\n\\blx@parenlevel@text=\\count353\n\\blx@parenlevel@foot=\\count354\n\\blx@sectionciteorder@0=\\count355\n\\blx@sectionciteorderinternal@0=\\count356\n\\blx@entrysetcounter=\\count357\n\\blx@biblioinstance=\\count358\n\\labelnumberwidth=\\skip60\n\\labelalphawidth=\\skip61\n\\biblabelsep=\\skip62\n\\bibitemsep=\\skip63\n\\bibnamesep=\\skip64\n\\bibinitsep=\\skip65\n\\bibparsep=\\skip66\n\\bibhang=\\skip67\n\\blx@bcfin=\\read4\n\\blx@bcfout=\\write4\n\\blx@langwohyphens=\\language3\n\\c@mincomprange=\\count359\n\\c@maxcomprange=\\count360\n\\c@mincompwidth=\\count361\nPackage biblatex Info: Trying to load biblatex default data model...\nPackage biblatex Info: ... file 'blx-dm.def' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/blx-dm.def\nFile: blx-dm.def 2022/07/12 v3.18b biblatex localization (PK/MW)\n)\nPackage biblatex Info: Trying to load biblatex custom data model...\nPackage biblatex Info: ... file 'biblatex-dm.cfg' not found.\n\\c@afterword=\\count362\n\\c@savedafterword=\\count363\n\\c@annotator=\\count364\n\\c@savedannotator=\\count365\n\\c@author=\\count366\n\\c@savedauthor=\\count367\n\\c@bookauthor=\\count368\n\\c@savedbookauthor=\\count369\n\\c@commentator=\\count370\n\\c@savedcommentator=\\count371\n\\c@editor=\\count372\n\\c@savededitor=\\count373\n\\c@editora=\\count374\n\\c@savededitora=\\count375\n\\c@editorb=\\count376\n\\c@savededitorb=\\count377\n\\c@editorc=\\count378\n\\c@savededitorc=\\count379\n\\c@foreword=\\count380\n\\c@savedforeword=\\count381\n\\c@holder=\\count382\n\\c@savedholder=\\count383\n\\c@introduction=\\count384\n\\c@savedintroduction=\\count385\n\\c@namea=\\count386\n\\c@savednamea=\\count387\n\\c@nameb=\\count388\n\\c@savednameb=\\count389\n\\c@namec=\\count390\n\\c@savednamec=\\count391\n\\c@translator=\\count392\n\\c@savedtranslator=\\count393\n\\c@shortauthor=\\count394\n\\c@savedshortauthor=\\count395\n\\c@shorteditor=\\count396\n\\c@savedshorteditor=\\count397\n\\c@labelname=\\count398\n\\c@savedlabelname=\\count399\n\\c@institution=\\count400\n\\c@savedinstitution=\\count401\n\\c@lista=\\count402\n\\c@savedlista=\\count403\n\\c@listb=\\count404\n\\c@savedlistb=\\count405\n\\c@listc=\\count406\n\\c@savedlistc=\\count407\n\\c@listd=\\count408\n\\c@savedlistd=\\count409\n\\c@liste=\\count410\n\\c@savedliste=\\count411\n\\c@listf=\\count412\n\\c@savedlistf=\\count413\n\\c@location=\\count414\n\\c@savedlocation=\\count415\n\\c@organization=\\count416\n\\c@savedorganization=\\count417\n\\c@origlocation=\\count418\n\\c@savedoriglocation=\\count419\n\\c@origpublisher=\\count420\n\\c@savedorigpublisher=\\count421\n\\c@publisher=\\count422\n\\c@savedpublisher=\\count423\n\\c@language=\\count424\n\\c@savedlanguage=\\count425\n\\c@origlanguage=\\count426\n\\c@savedoriglanguage=\\count427\n\\c@pageref=\\count428\n\\c@savedpageref=\\count429\n\\shorthandwidth=\\skip68\n\\shortjournalwidth=\\skip69\n\\shortserieswidth=\\skip70\n\\shorttitlewidth=\\skip71\n\\shortauthorwidth=\\skip72\n\\shorteditorwidth=\\skip73\n\\locallabelnumberwidth=\\skip74\n\\locallabelalphawidth=\\skip75\n\\localshorthandwidth=\\skip76\n\\localshortjournalwidth=\\skip77\n\\localshortserieswidth=\\skip78\n\\localshorttitlewidth=\\skip79\n\\localshortauthorwidth=\\skip80\n\\localshorteditorwidth=\\skip81\nPackage biblatex Info: Trying to load enhanced support for Unicode engines...\nPackage biblatex Info: ... file 'blx-unicode.def' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/blx-unicode.def)\nPackage biblatex Info: Trying to load compatibility code...\nPackage biblatex Info: ... file 'blx-compat.def' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/blx-compat.def\nFile: blx-compat.def 2022/07/12 v3.18b biblatex compatibility (PK/MW)\n)\nPackage biblatex Info: Trying to load generic definitions...\nPackage biblatex Info: ... file 'biblatex.def' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/biblatex.def\nFile: biblatex.def 2022/07/12 v3.18b biblatex compatibility (PK/MW)\n\\c@textcitecount=\\count430\n\\c@textcitetotal=\\count431\n\\c@textcitemaxnames=\\count432\n\\c@biburlbigbreakpenalty=\\count433\n\\c@biburlbreakpenalty=\\count434\n\\c@biburlnumpenalty=\\count435\n\\c@biburlucpenalty=\\count436\n\\c@biburllcpenalty=\\count437\n\\biburlbigskip=\\muskip18\n\\biburlnumskip=\\muskip19\n\\biburlucskip=\\muskip20\n\\biburllcskip=\\muskip21\n\\c@smartand=\\count438\n)\nPackage biblatex Info: Trying to load bibliography style 'numeric'...\nPackage biblatex Info: ... file 'numeric.bbx' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/bbx/numeric.bbx\nFile: numeric.bbx 2022/07/12 v3.18b biblatex bibliography style (PK/MW)\nPackage biblatex Info: Trying to load bibliography style 'standard'...\nPackage biblatex Info: ... file 'standard.bbx' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/bbx/standard.bbx\nFile: standard.bbx 2022/07/12 v3.18b biblatex bibliography style (PK/MW)\n\\c@bbx:relatedcount=\\count439\n\\c@bbx:relatedtotal=\\count440\n))\nPackage biblatex Info: Trying to load citation style 'numeric'...\nPackage biblatex Info: ... file 'numeric.cbx' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/cbx/numeric.cbx\nFile: numeric.cbx 2022/07/12 v3.18b biblatex citation style (PK/MW)\nPackage biblatex Info: Redefining '\\cite'.\nPackage biblatex Info: Redefining '\\parencite'.\nPackage biblatex Info: Redefining '\\footcite'.\nPackage biblatex Info: Redefining '\\footcitetext'.\nPackage biblatex Info: Redefining '\\smartcite'.\nPackage biblatex Info: Redefining '\\supercite'.\nPackage biblatex Info: Redefining '\\textcite'.\nPackage biblatex Info: Redefining '\\textcites'.\nPackage biblatex Info: Redefining '\\cites'.\nPackage biblatex Info: Redefining '\\parencites'.\nPackage biblatex Info: Redefining '\\smartcites'.\n)\nPackage biblatex Info: Trying to load configuration file...\nPackage biblatex Info: ... file 'biblatex.cfg' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/biblatex.cfg\nFile: biblatex.cfg \n)\nPackage biblatex Info: XeTeX detected.\n(biblatex)             Assuming input encoding 'utf8'.\nPackage biblatex Info: Document encoding is UTF8 ....\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/l3kernel/expl3.sty\nPackage: expl3 2022-08-30 L3 programming layer (loader) \n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/l3backend/l3backend-xetex.def\nFile: l3backend-xetex.def 2022-08-30 L3 backend support: XeTeX\n\\g__graphics_track_int=\\count441\n\\l__pdf_internal_box=\\box74\n\\g__pdf_backend_object_int=\\count442\n\\g__pdf_backend_annotation_int=\\count443\n\\g__pdf_backend_link_int=\\count444\n))\nPackage biblatex Info: ... and expl3\n(biblatex)             2022-08-30 L3 programming layer (loader) \n(biblatex)             is new enough (at least 2020/04/06),\n(biblatex)             setting 'casechanger=expl3'.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/blx-case-expl3.sty (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/l3packages/xparse/xparse.sty\nPackage: xparse 2022-06-22 L3 Experimental document command parser\n)\nPackage: blx-case-expl3 2022/07/12 v3.18b expl3 case changing code for biblatex\n)) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/geometry/geometry.sty\nPackage: geometry 2020/01/02 v5.9 Page Geometry\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/iftex/ifvtex.sty\nPackage: ifvtex 2019/10/25 v1.7 ifvtex legacy package. Use iftex instead.\n)\n\\Gm@cnth=\\count445\n\\Gm@cntv=\\count446\n\\c@Gm@tempcnt=\\count447\n\\Gm@bindingoffset=\\dimen277\n\\Gm@wd@mp=\\dimen278\n\\Gm@odd@mp=\\dimen279\n\\Gm@even@mp=\\dimen280\n\\Gm@layoutwidth=\\dimen281\n\\Gm@layoutheight=\\dimen282\n\\Gm@layouthoffset=\\dimen283\n\\Gm@layoutvoffset=\\dimen284\n\\Gm@dimlist=\\toks39\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/fancyhdr/fancyhdr.sty\nPackage: fancyhdr 2022/05/18 v4.0.3 Extensive control of page headers and footers\n\\f@nch@headwidth=\\skip82\n\\f@nch@O@elh=\\skip83\n\\f@nch@O@erh=\\skip84\n\\f@nch@O@olh=\\skip85\n\\f@nch@O@orh=\\skip86\n\\f@nch@O@elf=\\skip87\n\\f@nch@O@erf=\\skip88\n\\f@nch@O@olf=\\skip89\n\\f@nch@O@orf=\\skip90\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/xelatex/mathspec/mathspec.sty\nPackage: mathspec 2016/12/22 v0.2b LaTeX Package (Mathematics font selection for XeLaTeX)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.sty\nPackage: fontspec 2022/01/15 v2.8a Font selection for XeLaTeX and LuaLaTeX\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty\nPackage: fontspec-xetex 2022/01/15 v2.8a Font selection for XeLaTeX and LuaLaTeX\n\\l__fontspec_script_int=\\count448\n\\l__fontspec_language_int=\\count449\n\\l__fontspec_strnum_int=\\count450\n\\l__fontspec_tmp_int=\\count451\n\\l__fontspec_tmpa_int=\\count452\n\\l__fontspec_tmpb_int=\\count453\n\\l__fontspec_tmpc_int=\\count454\n\\l__fontspec_em_int=\\count455\n\\l__fontspec_emdef_int=\\count456\n\\l__fontspec_strong_int=\\count457\n\\l__fontspec_strongdef_int=\\count458\n\\l__fontspec_tmpa_dim=\\dimen285\n\\l__fontspec_tmpb_dim=\\dimen286\n\\l__fontspec_tmpc_dim=\\dimen287\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/base/fontenc.sty\nPackage: fontenc 2021/04/29 v2.0v Standard LaTeX package\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/fontspec/fontspec.cfg))) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/xkeyval/xkeyval.sty\nPackage: xkeyval 2022/06/16 v2.9 package option processing (HA)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/xkeyval/xkeyval.tex (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/xkeyval/xkvutils.tex\n\\XKV@toks=\\toks40\n\\XKV@tempa@toks=\\toks41\n)\n\\XKV@depth=\\count459\nFile: xkeyval.tex 2014/12/03 v2.7a key=value parser (HA)\n))\n\\c@eu@=\\count460\n\\c@eu@i=\\count461\n\\c@mkern=\\count462\n)\nPackage hyperref Info: Option `unicode' set `true' on input line 151.\nPackage hyperref Info: Option `breaklinks' set `true' on input line 151.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/grffile/grffile.sty\nPackage: grffile 2019/11/11 v2.1 Extended file name support for graphics (legacy)\nPackage grffile Info: This package is an empty stub for compatibility on input line 40.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/fancyvrb/fancyvrb.sty\nPackage: fancyvrb 2022/06/06 4.5 verbatim text (tvz,hv)\n\\FV@CodeLineNo=\\count463\n\\FV@InFile=\\read5\n\\FV@TabBox=\\box75\n\\c@FancyVerbLine=\\count464\n\\FV@StepNumber=\\count465\n\\FV@OutFile=\\write5\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/framed/framed.sty\nPackage: framed 2011/10/22 v 0.96: framed or shaded text with page breaks\n\\OuterFrameSep=\\skip91\n\\fb@frw=\\dimen288\n\\fb@frh=\\dimen289\n\\FrameRule=\\dimen290\n\\FrameSep=\\dimen291\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/tools/longtable.sty\nPackage: longtable 2021-09-01 v4.17 Multi-page Table package (DPC)\n\\LTleft=\\skip92\n\\LTright=\\skip93\n\\LTpre=\\skip94\n\\LTpost=\\skip95\n\\LTchunksize=\\count466\n\\LTcapwidth=\\dimen292\n\\LT@head=\\box76\n\\LT@firsthead=\\box77\n\\LT@foot=\\box78\n\\LT@lastfoot=\\box79\n\\LT@gbox=\\box80\n\\LT@cols=\\count467\n\\LT@rows=\\count468\n\\c@LT@tables=\\count469\n\\c@LT@chunks=\\count470\n\\LT@p@ftn=\\toks42\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/booktabs/booktabs.sty\nPackage: booktabs 2020/01/12 v1.61803398 Publication quality tables\n\\heavyrulewidth=\\dimen293\n\\lightrulewidth=\\dimen294\n\\cmidrulewidth=\\dimen295\n\\belowrulesep=\\dimen296\n\\belowbottomsep=\\dimen297\n\\aboverulesep=\\dimen298\n\\abovetopsep=\\dimen299\n\\cmidrulesep=\\dimen300\n\\cmidrulekern=\\dimen301\n\\defaultaddspace=\\dimen302\n\\@cmidla=\\count471\n\\@cmidlb=\\count472\n\\@aboverulesep=\\dimen303\n\\@belowrulesep=\\dimen304\n\\@thisruleclass=\\count473\n\\@lastruleclass=\\count474\n\\@thisrulewidth=\\dimen305\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/tools/array.sty\nPackage: array 2022/03/10 v2.5f Tabular extension package (FMi)\n\\col@sep=\\dimen306\n\\ar@mcellbox=\\box81\n\\extrarowheight=\\dimen307\n\\NC@list=\\toks43\n\\extratabsurround=\\skip96\n\\backup@length=\\skip97\n\\ar@cellbox=\\box82\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/mdwtools/footnote.sty\nPackage: footnote 1997/01/28 1.13 Save footnotes around boxes\n\\fn@notes=\\box83\n\\fn@width=\\dimen308\n)\n\\cslhangindent=\\skip98\n\\csllabelwidth=\\skip99\n\\cslentryspacingunit=\\skip100\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/multirow/multirow.sty\nPackage: multirow 2021/03/15 v2.8 Span multiple rows of a table\n\\multirow@colwidth=\\skip101\n\\multirow@cntb=\\count475\n\\multirow@dima=\\skip102\n\\bigstrutjot=\\dimen309\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/wrapfig/wrapfig.sty\n\\wrapoverhang=\\dimen310\n\\WF@size=\\dimen311\n\\c@WF@wrappedlines=\\count476\n\\WF@box=\\box84\n\\WF@everypar=\\toks44\nPackage: wrapfig 2003/01/31  v 3.6\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/float/float.sty\nPackage: float 2001/11/08 v1.3d Float enhancements (AL)\n\\c@float@type=\\count477\n\\float@exts=\\toks45\n\\float@box=\\box85\n\\@float@everytoks=\\toks46\n\\@floatcapt=\\box86\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/colortbl/colortbl.sty\nPackage: colortbl 2022/06/20 v1.0f Color table columns (DPC)\n\\everycr=\\toks47\n\\minrowclearance=\\skip103\n\\rownum=\\count478\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/pdflscape/pdflscape.sty\nPackage: pdflscape 2019/12/05 v0.12 Display of landscape pages in PDF (HO)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/graphics/lscape.sty\nPackage: lscape 2020/05/28 v3.02 Landscape Pages (DPC)\n)\nPackage pdflscape Info: Auto-detected driver: dvipdfm (xetex) on input line 98.\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/tabu/tabu.sty\nPackage: tabu 2019/01/11 v2.9 - flexible LaTeX tabulars (FC+tabu-fixed)\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/varwidth/varwidth.sty\nPackage: varwidth 2009/03/30 ver 0.92;  Variable-width minipages\n\\@vwid@box=\\box87\n\\sift@deathcycles=\\count479\n\\@vwid@loff=\\dimen312\n\\@vwid@roff=\\dimen313\n)\n\\c@taburow=\\count480\n\\tabu@nbcols=\\count481\n\\tabu@cnt=\\count482\n\\tabu@Xcol=\\count483\n\\tabu@alloc=\\count484\n\\tabu@nested=\\count485\n\\tabu@target=\\dimen314\n\\tabu@spreadtarget=\\dimen315\n\\tabu@naturalX=\\dimen316\n\\tabucolX=\\dimen317\n\\tabu@Xsum=\\dimen318\n\\extrarowdepth=\\dimen319\n\\abovetabulinesep=\\dimen320\n\\belowtabulinesep=\\dimen321\n\\tabustrutrule=\\dimen322\n\\tabu@thebody=\\toks48\n\\tabu@footnotes=\\toks49\n\\tabu@box=\\box88\n\\tabu@arstrutbox=\\box89\n\\tabu@hleads=\\box90\n\\tabu@vleads=\\box91\n\\tabu@cellskip=\\skip104\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/threeparttable/threeparttable.sty\nPackage: threeparttable 2003/06/13  v 3.0\n\\@tempboxb=\\box92\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/threeparttablex/threeparttablex.sty\nPackage: threeparttablex 2013/07/23 v0.3 by daleif\n\\TPTL@width=\\skip105\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/generic/ulem/ulem.sty\n\\UL@box=\\box93\n\\UL@hyphenbox=\\box94\n\\UL@skip=\\skip106\n\\UL@hook=\\toks50\n\\UL@height=\\dimen323\n\\UL@pe=\\count486\n\\UL@pixel=\\dimen324\n\\ULC@box=\\box95\nPackage: ulem 2019/11/18\n\\ULdepth=\\dimen325\n) (/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/makecell/makecell.sty\nPackage: makecell 2009/08/03 V0.1e Managing of Tab Column Heads and Cells\n\\rotheadsize=\\dimen326\n\\c@nlinenum=\\count487\n\\TeXr@lab=\\toks51\n)\n\\@quotelevel=\\count488\n\\@quotereset=\\count489\n(./paper.aux)\n\\openout1 = `paper.aux'.\n\nLaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for TS1/cmr/m/n on input line 305.\nLaTeX Font Info:    Trying to load font information for TS1+cmr on input line 305.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/base/ts1cmr.fd\nFile: ts1cmr.fd 2019/12/16 v2.5j Standard LaTeX font definitions\n)\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for TU/lmr/m/n on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nLaTeX Font Info:    Checking defaults for PU/pdf/m/n on input line 305.\nLaTeX Font Info:    ... okay on input line 305.\nPackage hyperref Info: Link coloring ON on input line 305.\n(./paper.out) (./paper.out)\n\\@outlinefile=\\write6\n\\openout6 = `paper.out'.\n\nPackage caption Info: Begin \\AtBeginDocument code.\nPackage caption Info: float package is loaded.\nPackage caption Info: longtable package is loaded.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/caption/ltcaption.sty\nPackage: ltcaption 2021/01/08 v1.4c longtable captions (AR)\n)\nPackage caption Info: threeparttable package is loaded.\nPackage caption Info: wrapfig package is loaded.\nPackage caption Info: End \\AtBeginDocument code.\nPackage biblatex Info: Trying to load language 'english'...\nPackage biblatex Info: ... file 'english.lbx' found.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/biblatex/lbx/english.lbx\nFile: english.lbx 2022/07/12 v3.18b biblatex localization (PK/MW)\n)\nPackage biblatex Info: XeTeX detected.\n(biblatex)             Assuming input encoding 'utf8'.\nPackage biblatex Info: Automatic encoding selection.\n(biblatex)             Assuming data encoding 'utf8'.\n\\openout4 = `paper.bcf'.\n\nPackage biblatex Info: Trying to load bibliographic data...\nPackage biblatex Info: ... file 'paper.bbl' not found.\nNo file paper.bbl.\nPackage biblatex Info: Reference section=0 on input line 305.\nPackage biblatex Info: Reference segment=0 on input line 305.\n*geometry* driver: auto-detecting\n*geometry* detected driver: xetex\n*geometry* verbose mode - [ preamble ] result:\n* driver: xetex\n* paper: a4paper\n* layout: <same size as paper>\n* layoutoffset:(h,v)=(0.0pt,0.0pt)\n* modes: includemp \n* h-part:(L,W,R)=(28.45274pt, 526.376pt, 42.67912pt)\n* v-part:(T,H,B)=(99.58464pt, 660.10394pt, 85.35826pt)\n* \\paperwidth=597.50787pt\n* \\paperheight=845.04684pt\n* \\textwidth=387.33861pt\n* \\textheight=660.10394pt\n* \\oddsidemargin=95.22015pt\n* \\evensidemargin=95.22015pt\n* \\topmargin=-60.28131pt\n* \\headheight=62.59596pt\n* \\headsep=25.0pt\n* \\topskip=10.0pt\n* \\footskip=30.0pt\n* \\marginparwidth=128.0374pt\n* \\marginparsep=11.0pt\n* \\columnsep=10.0pt\n* \\skip\\footins=9.0pt plus 4.0pt minus 2.0pt\n* \\hoffset=0.0pt\n* \\voffset=0.0pt\n* \\mag=1000\n* \\@twocolumnfalse\n* \\@twosidefalse\n* \\@mparswitchfalse\n* \\@reversemargintrue\n* (1in=72.27pt=25.4mm, 1cm=28.453pt)\n\nLaTeX Font Info:    Trying to load font information for U+msa on input line 306.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsa.fd\nFile: umsa.fd 2013/01/14 v3.01 AMS symbols A\n)\nLaTeX Font Info:    Trying to load font information for U+msb on input line 306.\n(/Users/indrajeetpatil/Library/TinyTeX/texmf-dist/tex/latex/amsfonts/umsb.fd\nFile: umsb.fd 2013/01/14 v3.01 AMS symbols B\n)\n\nPackage hyperref Warning: Suppressing link with empty target on input line 332.\n\n\nPackage hyperref Warning: Suppressing link with empty target on input line 332.\n\n\nPackage hyperref Warning: Suppressing link with empty target on input line 332.\n\nFile: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)\n</Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png>\n\nPackage fancyhdr Warning: \\headheight is too small (62.59596pt): \n(fancyhdr)                Make it at least 63.55022pt, for example:\n(fancyhdr)                \\setlength{\\headheight}{63.55022pt}.\n(fancyhdr)                You might also make \\topmargin smaller to compensate:\n(fancyhdr)                \\addtolength{\\topmargin}{-0.95425pt}.\n\nLaTeX Font Info:    Font shape `TU/lmss/m/it' in size <8> not available\n(Font)              Font shape `TU/lmss/m/sl' tried instead on input line 393.\n[1\n\n]\nFile: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)\n</Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png>\n\nPackage fancyhdr Warning: \\headheight is too small (62.59596pt): \n(fancyhdr)                Make it at least 63.55022pt, for example:\n(fancyhdr)                \\setlength{\\headheight}{63.55022pt}.\n(fancyhdr)                You might also make \\topmargin smaller to compensate:\n(fancyhdr)                \\addtolength{\\topmargin}{-0.95425pt}.\n\n[2]\nUnderfull \\hbox (badness 1448) in paragraph at lines 529--533\n\\TU/lmr/m/n/10 The workhorse function to get a comprehensive summary of data properties is\n []\n\nFile: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)\n</Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png>\n\nPackage fancyhdr Warning: \\headheight is too small (62.59596pt): \n(fancyhdr)                Make it at least 63.55022pt, for example:\n(fancyhdr)                \\setlength{\\headheight}{63.55022pt}.\n(fancyhdr)                You might also make \\topmargin smaller to compensate:\n(fancyhdr)                \\addtolength{\\topmargin}{-0.95425pt}.\n\n[3]\nOverfull \\hbox (25.8514pt too wide) in paragraph at lines 540--557\n[][] \n []\n\nFile: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)\n</Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png>\n\nPackage fancyhdr Warning: \\headheight is too small (62.59596pt): \n(fancyhdr)                Make it at least 63.55022pt, for example:\n(fancyhdr)                \\setlength{\\headheight}{63.55022pt}.\n(fancyhdr)                You might also make \\topmargin smaller to compensate:\n(fancyhdr)                \\addtolength{\\topmargin}{-0.95425pt}.\n\n[4]\nFile: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png Graphic file (type bmp)\n</Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library/rticles/rmarkdown/templates/joss/resources/JOSS-logo.png>\n\nPackage fancyhdr Warning: \\headheight is too small (62.59596pt): \n(fancyhdr)                Make it at least 63.55022pt, for example:\n(fancyhdr)                \\setlength{\\headheight}{63.55022pt}.\n(fancyhdr)                You might also make \\topmargin smaller to compensate:\n(fancyhdr)                \\addtolength{\\topmargin}{-0.95425pt}.\n\n[5] (./paper.aux)\nPackage rerunfilecheck Info: File `paper.out' has not changed.\n(rerunfilecheck)             Checksum: BFFBA38CB5FAA9461C3119C411944BB9;1328.\nPackage logreq Info: Writing requests to 'paper.run.xml'.\n\\openout1 = `paper.run.xml'.\n\n ) \nHere is how much of TeX's memory you used:\n 36661 strings out of 477747\n 751496 string characters out of 5842604\n 1543570 words of memory out of 5000000\n 57125 multiletter control sequences out of 15000+600000\n 477790 words of font info for 84 fonts, out of 8000000 for 9000\n 14 hyphenation exceptions out of 8191\n 84i,13n,81p,678b,848s stack positions out of 10000i,1000n,20000p,200000b,200000s\n\nOutput written on paper.pdf (5 pages).\n"
  },
  {
    "path": "paper/JOSS_files/paper.md",
    "content": "---\ntitle: \"datawizard: An R Package for Easy Data Preparation and Statistical Transformations\"\ntags:\n  - R\n  - easystats\nauthors:\n- affiliation: 1\n  name: Indrajeet Patil\n  orcid: 0000-0003-1995-6531\n- affiliation: 2\n  name: Dominique Makowski\n  orcid: 0000-0001-5375-9967\n- affiliation: 3\n  name: Mattan S. Ben-Shachar\n  orcid: 0000-0002-4287-4801\n- affiliation: 4\n  name: Brenton M. Wiernik^[Brenton Wiernik is currently an independent researcher and Research Scientist at Meta, Demography and Survey Science. The current work was done in an independent capacity.]\n  orcid: 0000-0001-9560-6336\n- affiliation: 5\n  name: Etienne Bacher\n  orcid: 0000-0002-9271-5075 \n- affiliation: 6\n  name: Daniel Lüdecke\n  orcid: 0000-0002-8895-3206\n  \naffiliations:\n- index: 1\n  name: cynkra Analytics GmbH, Germany\n- index: 2\n  name: Nanyang Technological University, Singapore\n- index: 3\n  name: Ben-Gurion University of the Negev, Israel\n- index: 4\n  name: Independent Researcher\n- index: 5\n  name: Luxembourg Institute of Socio-Economic Research (LISER), Luxembourg\n- index: 6\n  name: University Medical Center Hamburg-Eppendorf, Germany\n    \ndate: \"2022-10-04\"\nbibliography: paper.bib\noutput: rticles::joss_article\ncsl: apa.csl\njournal: JOSS\nlink-citations: yes\n---\n\n\n\n# Summary\n\nThe `{datawizard}` package for the R programming language [@base2021] provides a lightweight toolbox to assist in key steps involved in any data analysis workflow: (1) wrangling the raw data to get it in the needed form, (2) applying preprocessing steps and statistical transformations, and (3) compute statistical summaries of data properties and distributions. Therefore, it can be a valuable tool for R users and developers looking for a lightweight option for data preparation.\n\n# Statement of Need\n\nThe `{datawizard}` package is part of `{easystats}`, a collection of R packages designed to make statistical analysis easier (@Ben-Shachar2020, @Lüdecke2020parameters, @Lüdecke2020performance, @Lüdecke2021see, @Lüdecke2019, @Makowski2019, @Makowski2020). As this ecosystem follows a \"0-external-hard-dependency\" policy, a data manipulation package that relies only on base R needed to be created. In effect, `{datawizard}` provides a data processing backend for this entire ecosystem. \nIn addition to its usefulness to the `{easystats}` ecosystem, it also provides *an* option for R users and package developers if they wish to keep their (recursive) dependency weight to a minimum (for other options, see @Dowle2021, @Eastwood2021).\n\nBecause `{datawizard}` is also meant to be used and adopted easily by a wide range of users, its workflow and syntax are designed to be similar to `{tidyverse}` [@Wickham2019], a widely used ecosystem of R packages. Thus, users familiar with the `{tidyverse}` can easily translate their knowledge and make full use of `{datawizard}`.\n\nIn addition to being a lightweight solution to clean messy data, `{datawizard}` also provides helpers for the other important step of data analysis: applying statistical transformations to the cleaned data while setting up statistical models. This includes various types of data standardization, normalization, rank-transformation, and adjustment. These transformations, although widely used, are not currently collectively implemented in a package in the R ecosystem, so `{datawizard}` can help new R users in finding the transformation they need.\n\nLastly, `{datawizard}` also provides a toolbox to create detailed summaries of data properties and distributions (e.g., tables of descriptive statistics for each variable). This is a common step in data analysis, but it is not available in base R or many modeling packages, so its inclusion makes `{datawizard}` a one-stop-shop for data preparation tasks.\n\n# Features\n\n## Data Preparation\n\nThe raw data is rarely in a state that it can be directly fed into a statistical model. It often needs to be modified in various ways. For example, columns need to be renamed or reshaped, certain portions of the data need to be filtered out, data scattered across multiple tables needs to be joined, etc. \n\n`{datawizard}` provides various functions for cleaning and preparing data (see Table 1).\n\n| Function         | Operation                             |\n| :--------------- | :------------------------------------ |\n| `data_filter()`  | to select only certain *observations* |\n| `data_select()`  | to select only certain *variables*    |\n| `data_extract()` | to extract a single *variable*        |\n| `data_rename()`  | to rename variables                   |\n| `data_to_long()` | to convert data from wide to long     |\n| `data_to_wide()` | to convert data from long to wide     |\n| `data_join()`    | to join two data frames               |\n| ...              | ...                                   |\n\nTable: The table below lists a few key functions offered by `{datawizard}` for data wrangling. To see the full list, see the package website: <https://easystats.github.io/datawizard/>\n\nWe will look at one example function that converts data in wide format to tidy/long format:\n\n\n```r\nstocks <- data.frame(\n  time = as.Date(\"2009-01-01\") + 0:4,\n  X = rnorm(5, 0, 1),\n  Y = rnorm(5, 0, 2)\n)\n\nstocks\n#>         time           X          Y\n#> 1 2009-01-01 -0.91474184 -0.5654808\n#> 2 2009-01-02  1.00124785 -1.5270177\n#> 3 2009-01-03 -0.05642291 -1.3700199\n#> 4 2009-01-04  0.29664516  0.7341479\n#> 5 2009-01-05 -2.79147086  0.3659937\n\ndata_to_long(\n  stocks,\n  select = -c(\"time\"),\n  names_to = \"stock\",\n  values_to = \"price\"\n)\n#>          time stock       price\n#> 1  2009-01-01     X -0.91474184\n#> 2  2009-01-01     Y -0.56548082\n#> 3  2009-01-02     X  1.00124785\n#> 4  2009-01-02     Y -1.52701766\n#> 5  2009-01-03     X -0.05642291\n#> 6  2009-01-03     Y -1.37001987\n#> 7  2009-01-04     X  0.29664516\n#> 8  2009-01-04     Y  0.73414790\n#> 9  2009-01-05     X -2.79147086\n#> 10 2009-01-05     Y  0.36599370\n```\n\n## Statistical Transformations\n\nEven after getting the raw data in the needed format, we may need to transform certain variables further to meet requirements imposed by a statistical test.\n\n`{datawizard}` provides a rich collection of such functions for transforming variables (see Table 2).\n\n| Function          | Operation                                    |\n| :---------------- | :------------------------------------------- |\n| `standardize()`   | to center and scale data                     |\n| `normalize()`     | to scale variables to 0-1 range              |\n| `adjust()`        | to adjust data for effect of other variables |\n| `slide()`         | to shift numeric value range                 |\n| `ranktransform()` | to convert numeric values to integer ranks   |\n| ...               | ...                                          |\n\nTable: The table below lists a few key functions offered by `{datawizard}` for data transformations. To see the full list, see the package website: <https://easystats.github.io/datawizard/>\n\nWe will look at one example function that standardizes (i.e. centers and scales) data so that it can be expressed in terms of standard deviation:\n\n\n```r\nd <- data.frame(\n  a = c(-2, -1, 0, 1, 2),\n  b = c(3, 4, 5, 6, 7)\n)\n\nstandardize(d, center = c(3, 4), scale = c(2, 4))\n#>      a     b\n#> 1 -2.5 -0.25\n#> 2 -2.0  0.00\n#> 3 -1.5  0.25\n#> 4 -1.0  0.50\n#> 5 -0.5  0.75\n```\n\n## Summaries of Data Properties and Distributions\n\nThe workhorse function to get a comprehensive summary of data properties is `describe_distribution()`, which combines a set of indices (e.g., measures of centrality, dispersion, range, skewness, kurtosis, etc.) computed by other functions in `{datawizard}`.\n\n\n```r\ndescribe_distribution(mtcars)\n```\n\n\n\\begin{tabular}[t]{lrrrrrrrrr}\n\\toprule\nVariable & Mean & SD & IQR & Min & Max & Skewness & Kurtosis & n & n\\_Missing\\\\\n\\midrule\nmpg & 20.09 & 6.03 & 7.53 & 10.4 & 33.9 & 0.67 & -0.02 & 32 & 0\\\\\ncyl & 6.19 & 1.79 & 4.00 & 4.0 & 8.0 & -0.19 & -1.76 & 32 & 0\\\\\ndisp & 230.72 & 123.94 & 221.52 & 71.1 & 472.0 & 0.42 & -1.07 & 32 & 0\\\\\nhp & 146.69 & 68.56 & 84.50 & 52.0 & 335.0 & 0.80 & 0.28 & 32 & 0\\\\\ndrat & 3.60 & 0.53 & 0.84 & 2.8 & 4.9 & 0.29 & -0.45 & 32 & 0\\\\\nwt & 3.22 & 0.98 & 1.19 & 1.5 & 5.4 & 0.47 & 0.42 & 32 & 0\\\\\nqsec & 17.85 & 1.79 & 2.02 & 14.5 & 22.9 & 0.41 & 0.86 & 32 & 0\\\\\nvs & 0.44 & 0.50 & 1.00 & 0.0 & 1.0 & 0.26 & -2.06 & 32 & 0\\\\\nam & 0.41 & 0.50 & 1.00 & 0.0 & 1.0 & 0.40 & -1.97 & 32 & 0\\\\\ngear & 3.69 & 0.74 & 1.00 & 3.0 & 5.0 & 0.58 & -0.90 & 32 & 0\\\\\ncarb & 2.81 & 1.62 & 2.00 & 1.0 & 8.0 & 1.16 & 2.02 & 32 & 0\\\\\n\\bottomrule\n\\end{tabular}\n\n# Licensing and Availability\n\n`{datawizard}` is licensed under the GNU General Public License (v3.0), with all source code openly developed and stored on GitHub (<https://github.com/easystats/datawizard>), along with a corresponding issue tracker for bug reporting and feature enhancements. In the spirit of honest and open science, we encourage requests, tips for fixes, feature updates, as well as general questions and concerns via direct interaction with contributors and developers.\n\n# Acknowledgments\n\n`{datawizard}` is part of the collaborative [*easystats*](https://easystats.github.io/easystats/) ecosystem. Thus, we thank the [members of easystats](https://github.com/orgs/easystats/people) as well as the users.\n\n# References\n"
  },
  {
    "path": "pkgdown/_pkgdown.yaml",
    "content": "url: https://easystats.github.io/datawizard/\n\ntemplate:\n  bootstrap: 5\n  package: easystatstemplate\n\nreference:\n  - title: Data Preparation\n    desc: |\n      Main functions for cleaning and preparing data\n    contents:\n      - data_to_long\n      - data_to_wide\n      - data_extract\n      - data_filter\n      - data_select\n      - data_reorder\n      - data_arrange\n      - data_merge\n      - data_partition\n      - data_rotate\n      - data_group\n      - data_replicate\n      - data_duplicated\n      - data_unique\n\n  - title: Data and Variable Transformations\n  - subtitle: Statistical Transformations\n    desc: |\n      Functions for transforming variables\n    contents:\n      - data_modify\n      - data_separate\n      - data_unite\n      - categorize\n      - recode_into\n      - recode_values\n      - adjust\n      - demean\n      - ranktransform\n      - rescale_weights\n      - winsorize\n  - subtitle: \"Linear Transformers\"\n    desc: |\n      Convenient functions for common linear transformations\n    contents:\n      - center\n      - slide\n      - standardize\n      - standardize.default\n      - reverse\n      - rescale\n      - normalize\n      - unstandardize\n      - makepredictcall.dw_transformer\n  - subtitle: \"Others\"\n    contents:\n    - contr.deviation\n\n  - title: Data Properties\n    desc: |\n      Functions to compute statistical summaries of data properties and distributions\n    contents:\n      - as.prop.table\n      - data_codebook\n      - data_summary\n      - data_tabulate\n      - data_peek\n      - data_seek\n      - means_by_group\n      - contains(\"distribution\")\n      - kurtosis\n      - smoothness\n      - skewness\n      - row_count\n      - row_means\n      - weighted_mean\n      - mean_sd\n\n  - title: Convert and Replace Data\n    desc: |\n      Helpers for data replacements\n    contents:\n      - assign_labels\n      - labels_to_levels\n      - contains(\"to_numeric\")\n      - to_factor\n      - starts_with(\"replace_\")\n      - starts_with(\"convert_\")\n\n  - title: Import data\n    desc: |\n      Helpers for importing data\n    contents:\n      - data_read\n\n  - title: Helpers for Data Preparation\n    desc: |\n      Primarily useful in the context of other 'easystats' packages\n    contents:\n      - reshape_ci\n      - data_rename\n      - data_addprefix\n      - remove_empty\n      - contains(\"rownames\")\n      - rowid_as_column\n      - contains(\"colnames\")\n      - extract_column_names\n      - data_restoretype\n\n  - title: Helpers for Text Formatting\n    desc: |\n      Primarily useful for 'report' package\n    contents:\n      - starts_with(\"text_\")\n\n  - title: Visualization helpers\n    desc: |\n      Primarily useful in the context of other 'easystats' packages\n    contents:\n      - visualisation_recipe\n\n  - title: Data\n    desc: |\n      Datasets useful for examples and tests\n    contents:\n      - efc\n      - nhanes_sample\n\narticles:\n  - title: Overview of vignettes\n    navbar: ~\n    contents:\n      - overview_of_vignettes\n\n  - title: Data Preparation\n    desc: |\n      Articles explaining utility of 'datawizard' for data wrangling\n    navbar: ~\n    contents:\n      - tidyverse_translation\n      - selection_syntax\n\n  - title: Statistical Transformations\n    desc: |\n      Articles describing use of 'datawizard' functions for tranforming data\n      to use in regression models\n    navbar: ~\n    contents:\n      - standardize_data\n"
  },
  {
    "path": "tests/testthat/_snaps/categorize.md",
    "content": "# categorize labelling ranged\n\n    Code\n      categorize(mtcars$mpg, \"equal_length\", n_groups = 5)\n    Output\n       [1] 3 3 3 3 2 2 1 3 3 2 2 2 2 2 1 1 1 5 5 5 3 2 2 1 2 4 4 5 2 2 1 3\n\n---\n\n    Code\n      categorize(mtcars$mpg, \"equal_length\", n_groups = 5, labels = \"range\")\n    Output\n       [1] [19.8,24.5) [19.8,24.5) [19.8,24.5) [19.8,24.5) [15.1,19.8) [15.1,19.8)\n       [7] [10.4,15.1) [19.8,24.5) [19.8,24.5) [15.1,19.8) [15.1,19.8) [15.1,19.8)\n      [13] [15.1,19.8) [15.1,19.8) [10.4,15.1) [10.4,15.1) [10.4,15.1) [29.2,33.9]\n      [19] [29.2,33.9] [29.2,33.9] [19.8,24.5) [15.1,19.8) [15.1,19.8) [10.4,15.1)\n      [25] [15.1,19.8) [24.5,29.2) [24.5,29.2) [29.2,33.9] [15.1,19.8) [15.1,19.8)\n      [31] [10.4,15.1) [19.8,24.5)\n      Levels: [10.4,15.1) [15.1,19.8) [19.8,24.5) [24.5,29.2) [29.2,33.9]\n\n---\n\n    Code\n      categorize(mtcars$mpg, \"equal_length\", n_groups = 5, labels = \"observed\")\n    Output\n       [1] (21-24.4)   (21-24.4)   (21-24.4)   (21-24.4)   (15.2-19.7) (15.2-19.7)\n       [7] (10.4-15)   (21-24.4)   (21-24.4)   (15.2-19.7) (15.2-19.7) (15.2-19.7)\n      [13] (15.2-19.7) (15.2-19.7) (10.4-15)   (10.4-15)   (10.4-15)   (30.4-33.9)\n      [19] (30.4-33.9) (30.4-33.9) (21-24.4)   (15.2-19.7) (15.2-19.7) (10.4-15)  \n      [25] (15.2-19.7) (26-27.3)   (26-27.3)   (30.4-33.9) (15.2-19.7) (15.2-19.7)\n      [31] (10.4-15)   (21-24.4)  \n      Levels: (10.4-15) (15.2-19.7) (21-24.4) (26-27.3) (30.4-33.9)\n\n# categorize breaks\n\n    Code\n      categorize(mtcars$mpg, \"equal_length\", n_groups = 5, labels = \"range\", breaks = \"inclusive\")\n    Output\n       [1] (19.8,24.5] (19.8,24.5] (19.8,24.5] (19.8,24.5] (15.1,19.8] (15.1,19.8]\n       [7] [10.4,15.1] (19.8,24.5] (19.8,24.5] (15.1,19.8] (15.1,19.8] (15.1,19.8]\n      [13] (15.1,19.8] (15.1,19.8] [10.4,15.1] [10.4,15.1] [10.4,15.1] (29.2,33.9]\n      [19] (29.2,33.9] (29.2,33.9] (19.8,24.5] (15.1,19.8] (15.1,19.8] [10.4,15.1]\n      [25] (15.1,19.8] (24.5,29.2] (24.5,29.2] (29.2,33.9] (15.1,19.8] (15.1,19.8]\n      [31] [10.4,15.1] (19.8,24.5]\n      Levels: [10.4,15.1] (15.1,19.8] (19.8,24.5] (24.5,29.2] (29.2,33.9]\n\n"
  },
  {
    "path": "tests/testthat/_snaps/contr.deviation.md",
    "content": "# contr.deviation | snapshot\n\n    Code\n      solve(c.deviation)\n    Output\n                         4         6         8\n      Intercept  0.3333333 0.3333333 0.3333333\n      6         -1.0000000 1.0000000 0.0000000\n      8         -1.0000000 0.0000000 1.0000000\n\n---\n\n    Code\n      solve(mm)\n    Output\n                    cyl4.am0   cyl4.am1   cyl6.am0  cyl6.am1   cyl8.am0  cyl8.am1\n      (Intercept)  0.3333333  0.0000000  0.3333333 0.0000000  0.3333333 0.0000000\n      cyl6        -1.0000000  0.0000000  1.0000000 0.0000000  0.0000000 0.0000000\n      cyl8        -1.0000000  0.0000000  0.0000000 0.0000000  1.0000000 0.0000000\n      am1         -0.3333333  0.3333333 -0.3333333 0.3333333 -0.3333333 0.3333333\n      cyl6:am1     1.0000000 -1.0000000 -1.0000000 1.0000000  0.0000000 0.0000000\n      cyl8:am1     1.0000000 -1.0000000  0.0000000 0.0000000 -1.0000000 1.0000000\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_codebook.md",
    "content": "# data_codebook iris\n\n    Code\n      data_codebook(iris)\n    Output\n      iris (150 rows and 5 variables, 5 shown)\n      \n      ID | Name         | Type        | Missings |     Values |          N\n      ---+--------------+-------------+----------+------------+-----------\n      1  | Sepal.Length | numeric     | 0 (0.0%) | [4.3, 7.9] |        150\n      ---+--------------+-------------+----------+------------+-----------\n      2  | Sepal.Width  | numeric     | 0 (0.0%) |   [2, 4.4] |        150\n      ---+--------------+-------------+----------+------------+-----------\n      3  | Petal.Length | numeric     | 0 (0.0%) |   [1, 6.9] |        150\n      ---+--------------+-------------+----------+------------+-----------\n      4  | Petal.Width  | numeric     | 0 (0.0%) | [0.1, 2.5] |        150\n      ---+--------------+-------------+----------+------------+-----------\n      5  | Species      | categorical | 0 (0.0%) |     setosa | 50 (33.3%)\n         |              |             |          | versicolor | 50 (33.3%)\n         |              |             |          |  virginica | 50 (33.3%)\n      --------------------------------------------------------------------\n\n# data_codebook iris, reordered\n\n    Code\n      data_codebook(iris[c(1, 2, 5, 3, 4)])\n    Output\n      iris[c(1, 2, 5, 3, 4)] (150 rows and 5 variables, 5 shown)\n      \n      ID | Name         | Type        | Missings |     Values |          N\n      ---+--------------+-------------+----------+------------+-----------\n      1  | Sepal.Length | numeric     | 0 (0.0%) | [4.3, 7.9] |        150\n      ---+--------------+-------------+----------+------------+-----------\n      2  | Sepal.Width  | numeric     | 0 (0.0%) |   [2, 4.4] |        150\n      ---+--------------+-------------+----------+------------+-----------\n      3  | Species      | categorical | 0 (0.0%) |     setosa | 50 (33.3%)\n         |              |             |          | versicolor | 50 (33.3%)\n         |              |             |          |  virginica | 50 (33.3%)\n      ---+--------------+-------------+----------+------------+-----------\n      4  | Petal.Length | numeric     | 0 (0.0%) |   [1, 6.9] |        150\n      ---+--------------+-------------+----------+------------+-----------\n      5  | Petal.Width  | numeric     | 0 (0.0%) | [0.1, 2.5] |        150\n      --------------------------------------------------------------------\n\n# data_codebook NaN and Inf\n\n    Code\n      data_codebook(d)\n    Output\n      d (9 rows and 1 variables, 1 shown)\n      \n      ID | Name | Type    |  Missings | Values |         N\n      ---+------+---------+-----------+--------+----------\n      1  | x    | numeric | 2 (22.2%) |      1 | 3 (42.9%)\n         |      |         |           |      2 | 1 (14.3%)\n         |      |         |           |      4 | 2 (28.6%)\n         |      |         |           |    Inf | 1 (14.3%)\n      ----------------------------------------------------\n\n---\n\n    Code\n      data_codebook(d)\n    Output\n      d (102 rows and 1 variables, 1 shown)\n      \n      ID | Name | Type    | Missings |  Values |           N\n      ---+------+---------+----------+---------+------------\n      1  | x    | numeric | 0 (0.0%) | [1, 15] | 102 (98.1%)\n         |      |         |          |     Inf |   2 ( 1.9%)\n      ------------------------------------------------------\n\n---\n\n    Code\n      data_codebook(d, range_at = 100)\n    Output\n      d (102 rows and 1 variables, 1 shown)\n      \n      ID | Name | Type    | Missings | Values |          N\n      ---+------+---------+----------+--------+-----------\n      1  | x    | numeric | 0 (0.0%) |      1 |  4 ( 4.0%)\n         |      |         |          |      2 |  5 ( 5.0%)\n         |      |         |          |      3 |  6 ( 6.0%)\n         |      |         |          |      4 |  5 ( 5.0%)\n         |      |         |          |      5 |  8 ( 8.0%)\n         |      |         |          |      6 | 10 (10.0%)\n         |      |         |          |      7 |  6 ( 6.0%)\n         |      |         |          |      8 |  3 ( 3.0%)\n         |      |         |          |      9 | 13 (13.0%)\n         |      |         |          |     10 |  7 ( 7.0%)\n         |      |         |          |  (...) |           \n      ----------------------------------------------------\n\n---\n\n    Code\n      data_codebook(d, range_at = 100, max_values = 4)\n    Output\n      d (102 rows and 1 variables, 1 shown)\n      \n      ID | Name | Type    | Missings | Values |        N\n      ---+------+---------+----------+--------+---------\n      1  | x    | numeric | 0 (0.0%) |      1 | 4 (4.0%)\n         |      |         |          |      2 | 5 (5.0%)\n         |      |         |          |      3 | 6 (6.0%)\n         |      |         |          |      4 | 5 (5.0%)\n         |      |         |          |  (...) |         \n      --------------------------------------------------\n\n# data_codebook, tinytable\n\n    Code\n      display(data_codebook(d), format = \"tt\")\n    Output\n      \n      +----+------+---------+-----------+--------+-----------+\n      | ID | Name | Type    | Missings  | Values | N         |\n      +====+======+=========+===========+========+===========+\n      | 1  | x    | numeric | 2 (22.2%) | 1      | 3 (42.9%) |\n      +----+------+---------+-----------+--------+-----------+\n      |    |      |         |           | 2      | 1 (14.3%) |\n      +----+------+---------+-----------+--------+-----------+\n      |    |      |         |           | 4      | 2 (28.6%) |\n      +----+------+---------+-----------+--------+-----------+\n      |    |      |         |           | Inf    | 1 (14.3%) |\n      +----+------+---------+-----------+--------+-----------+\n      \n      Table: d (9 rows and 1 variables, 1 shown) \n\n---\n\n    Code\n      display(data_codebook(d), format = \"tt\")\n    Output\n      \n      +----+------+---------+----------+---------+-------------+\n      | ID | Name | Type    | Missings | Values  | N           |\n      +====+======+=========+==========+=========+=============+\n      | 1  | x    | numeric | 0 (0.0%) | [1, 15] | 102 (98.1%) |\n      +----+------+---------+----------+---------+-------------+\n      |    |      |         |          | Inf     | 2 (1.9%)    |\n      +----+------+---------+----------+---------+-------------+\n      \n      Table: d (102 rows and 1 variables, 1 shown) \n\n---\n\n    Code\n      display(data_codebook(d, range_at = 100), format = \"tt\")\n    Output\n      \n      +----+------+---------+----------+--------+------------+\n      | ID | Name | Type    | Missings | Values | N          |\n      +====+======+=========+==========+========+============+\n      | 1  | x    | numeric | 0 (0.0%) | 1      | 4 (4.0%)   |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 2      | 5 (5.0%)   |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 3      | 6 (6.0%)   |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 4      | 5 (5.0%)   |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 5      | 8 (8.0%)   |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 6      | 10 (10.0%) |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 7      | 6 (6.0%)   |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 8      | 3 (3.0%)   |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 9      | 13 (13.0%) |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | 10     | 7 (7.0%)   |\n      +----+------+---------+----------+--------+------------+\n      |    |      |         |          | (...)  |            |\n      +----+------+---------+----------+--------+------------+\n      \n      Table: d (102 rows and 1 variables, 1 shown) \n\n---\n\n    Code\n      display(data_codebook(d, range_at = 100, max_values = 4), format = \"tt\")\n    Output\n      \n      +----+------+---------+----------+--------+----------+\n      | ID | Name | Type    | Missings | Values | N        |\n      +====+======+=========+==========+========+==========+\n      | 1  | x    | numeric | 0 (0.0%) | 1      | 4 (4.0%) |\n      +----+------+---------+----------+--------+----------+\n      |    |      |         |          | 2      | 5 (5.0%) |\n      +----+------+---------+----------+--------+----------+\n      |    |      |         |          | 3      | 6 (6.0%) |\n      +----+------+---------+----------+--------+----------+\n      |    |      |         |          | 4      | 5 (5.0%) |\n      +----+------+---------+----------+--------+----------+\n      |    |      |         |          | (...)  |          |\n      +----+------+---------+----------+--------+----------+\n      \n      Table: d (102 rows and 1 variables, 1 shown) \n\n---\n\n    Code\n      display(data_codebook(iris[c(1, 2, 5, 3, 4)]), format = \"tt\")\n    Output\n      \n      +----+--------------+-------------+----------+------------+------------+\n      | ID | Name         | Type        | Missings | Values     | N          |\n      +====+==============+=============+==========+============+============+\n      | 1  | Sepal.Length | numeric     | 0 (0.0%) | [4.3, 7.9] | 150        |\n      +----+--------------+-------------+----------+------------+------------+\n      | 2  | Sepal.Width  | numeric     | 0 (0.0%) | [2, 4.4]   | 150        |\n      +----+--------------+-------------+----------+------------+------------+\n      | 3  | Species      | categorical | 0 (0.0%) | setosa     | 50 (33.3%) |\n      +----+--------------+-------------+----------+------------+------------+\n      |    |              |             |          | versicolor | 50 (33.3%) |\n      +----+--------------+-------------+----------+------------+------------+\n      |    |              |             |          | virginica  | 50 (33.3%) |\n      +----+--------------+-------------+----------+------------+------------+\n      | 4  | Petal.Length | numeric     | 0 (0.0%) | [1, 6.9]   | 150        |\n      +----+--------------+-------------+----------+------------+------------+\n      | 5  | Petal.Width  | numeric     | 0 (0.0%) | [0.1, 2.5] | 150        |\n      +----+--------------+-------------+----------+------------+------------+\n      \n      Table: iris[c(1, 2, 5, 3, 4)] (150 rows and 5 variables, 5 shown) \n\n# data_codebook iris, select\n\n    Code\n      data_codebook(iris, select = starts_with(\"Sepal\"))\n    Output\n      iris (150 rows and 5 variables, 2 shown)\n      \n      ID | Name         | Type    | Missings |     Values |   N\n      ---+--------------+---------+----------+------------+----\n      1  | Sepal.Length | numeric | 0 (0.0%) | [4.3, 7.9] | 150\n      ---+--------------+---------+----------+------------+----\n      2  | Sepal.Width  | numeric | 0 (0.0%) |   [2, 4.4] | 150\n      ---------------------------------------------------------\n\n# data_codebook iris, select, ID\n\n    Code\n      data_codebook(iris, select = starts_with(\"Petal\"))\n    Output\n      iris (150 rows and 5 variables, 2 shown)\n      \n      ID | Name         | Type    | Missings |     Values |   N\n      ---+--------------+---------+----------+------------+----\n      3  | Petal.Length | numeric | 0 (0.0%) |   [1, 6.9] | 150\n      ---+--------------+---------+----------+------------+----\n      4  | Petal.Width  | numeric | 0 (0.0%) | [0.1, 2.5] | 150\n      ---------------------------------------------------------\n\n# data_codebook efc\n\n    Code\n      print(data_codebook(efc), table_width = Inf)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                                    | Type        |   Missings |   Values | Value Labels                    |          N\n      ---+----------+------------------------------------------+-------------+------------+----------+---------------------------------+-----------\n      1  | c12hour  | average number of hours of care per week | numeric     |   2 (2.0%) | [5, 168] |                                 |         98\n      ---+----------+------------------------------------------+-------------+------------+----------+---------------------------------+-----------\n      2  | e16sex   | elder's gender                           | numeric     |   0 (0.0%) |        1 | male                            | 46 (46.0%)\n         |          |                                          |             |            |        2 | female                          | 54 (54.0%)\n      ---+----------+------------------------------------------+-------------+------------+----------+---------------------------------+-----------\n      3  | e42dep   | elder's dependency                       | categorical |   3 (3.0%) |        1 | independent                     |  2 ( 2.1%)\n         |          |                                          |             |            |        2 | slightly dependent              |  4 ( 4.1%)\n         |          |                                          |             |            |        3 | moderately dependent            | 28 (28.9%)\n         |          |                                          |             |            |        4 | severely dependent              | 63 (64.9%)\n      ---+----------+------------------------------------------+-------------+------------+----------+---------------------------------+-----------\n      4  | c172code | carer's level of education               | numeric     | 10 (10.0%) |        1 | low level of education          |  8 ( 8.9%)\n         |          |                                          |             |            |        2 | intermediate level of education | 66 (73.3%)\n         |          |                                          |             |            |        3 | high level of education         | 16 (17.8%)\n      ---+----------+------------------------------------------+-------------+------------+----------+---------------------------------+-----------\n      5  | neg_c_7  | Negative impact with 7 items             | numeric     |   3 (3.0%) |  [7, 28] |                                 |         97\n      ---------------------------------------------------------------------------------------------------------------------------------------------\n\n---\n\n    Code\n      print(data_codebook(efc), table_width = \"auto\", remove_duplicates = FALSE)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                                    | Type       \n      ---+----------+------------------------------------------+------------\n      1  | c12hour  | average number of hours of care per week | numeric    \n      ---+----------+------------------------------------------+------------\n      2  | e16sex   | elder's gender                           | numeric    \n         |          |                                          |            \n      ---+----------+------------------------------------------+------------\n      3  | e42dep   | elder's dependency                       | categorical\n         |          |                                          |            \n         |          |                                          |            \n         |          |                                          |            \n      ---+----------+------------------------------------------+------------\n      4  | c172code | carer's level of education               | numeric    \n         |          |                                          |            \n         |          |                                          |            \n      ---+----------+------------------------------------------+------------\n      5  | neg_c_7  | Negative impact with 7 items             | numeric    \n      ----------------------------------------------------------------------\n      \n      ID |   Missings |   Values | Value Labels                    |          N\n      ---+------------+----------+---------------------------------+-----------\n      1  |   2 (2.0%) | [5, 168] |                                 |         98\n      ---+------------+----------+---------------------------------+-----------\n      2  |   0 (0.0%) |        1 | male                            | 46 (46.0%)\n         |            |        2 | female                          | 54 (54.0%)\n      ---+------------+----------+---------------------------------+-----------\n      3  |   3 (3.0%) |        1 | independent                     |  2 ( 2.1%)\n         |            |        2 | slightly dependent              |  4 ( 4.1%)\n         |            |        3 | moderately dependent            | 28 (28.9%)\n         |            |        4 | severely dependent              | 63 (64.9%)\n      ---+------------+----------+---------------------------------+-----------\n      4  | 10 (10.0%) |        1 | low level of education          |  8 ( 8.9%)\n         |            |        2 | intermediate level of education | 66 (73.3%)\n         |            |        3 | high level of education         | 16 (17.8%)\n      ---+------------+----------+---------------------------------+-----------\n      5  |   3 (3.0%) |  [7, 28] |                                 |         97\n      -------------------------------------------------------------------------\n\n---\n\n    Code\n      print(data_codebook(efc), table_width = \"auto\", remove_duplicates = TRUE)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                                    | Type       \n      ---+----------+------------------------------------------+------------\n      1  | c12hour  | average number of hours of care per week | numeric    \n      ---+----------+------------------------------------------+------------\n      2  | e16sex   | elder's gender                           | numeric    \n      ---+----------+------------------------------------------+------------\n      3  | e42dep   | elder's dependency                       | categorical\n      ---+----------+------------------------------------------+------------\n      4  | c172code | carer's level of education               | numeric    \n      ---+----------+------------------------------------------+------------\n      5  | neg_c_7  | Negative impact with 7 items             | numeric    \n      ----------------------------------------------------------------------\n      \n      ID |   Missings |   Values | Value Labels                    |          N\n      ---+------------+----------+---------------------------------+-----------\n      1  |   2 (2.0%) | [5, 168] |                                 |         98\n      ---+------------+----------+---------------------------------+-----------\n      2  |   0 (0.0%) |        1 | male                            | 46 (46.0%)\n         |            |        2 | female                          | 54 (54.0%)\n      ---+------------+----------+---------------------------------+-----------\n      3  |   3 (3.0%) |        1 | independent                     |  2 ( 2.1%)\n         |            |        2 | slightly dependent              |  4 ( 4.1%)\n         |            |        3 | moderately dependent            | 28 (28.9%)\n         |            |        4 | severely dependent              | 63 (64.9%)\n      ---+------------+----------+---------------------------------+-----------\n      4  | 10 (10.0%) |        1 | low level of education          |  8 ( 8.9%)\n         |            |        2 | intermediate level of education | 66 (73.3%)\n         |            |        3 | high level of education         | 16 (17.8%)\n      ---+------------+----------+---------------------------------+-----------\n      5  |   3 (3.0%) |  [7, 28] |                                 |         97\n      -------------------------------------------------------------------------\n\n# data_codebook efc, variable_label_width\n\n    Code\n      print(out, table_width = Inf)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                        | Type        |   Missings |   Values | Value Labels                    |          N\n      ---+----------+------------------------------+-------------+------------+----------+---------------------------------+-----------\n      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%) | [5, 168] |                                 |         98\n         |          | care per week                |             |            |          |                                 |           \n      ---+----------+------------------------------+-------------+------------+----------+---------------------------------+-----------\n      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%) |        1 | male                            | 46 (46.0%)\n         |          |                              |             |            |        2 | female                          | 54 (54.0%)\n      ---+----------+------------------------------+-------------+------------+----------+---------------------------------+-----------\n      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%) |        1 | independent                     |  2 ( 2.1%)\n         |          |                              |             |            |        2 | slightly dependent              |  4 ( 4.1%)\n         |          |                              |             |            |        3 | moderately dependent            | 28 (28.9%)\n         |          |                              |             |            |        4 | severely dependent              | 63 (64.9%)\n      ---+----------+------------------------------+-------------+------------+----------+---------------------------------+-----------\n      4  | c172code | carer's level of education   | numeric     | 10 (10.0%) |        1 | low level of education          |  8 ( 8.9%)\n         |          |                              |             |            |        2 | intermediate level of education | 66 (73.3%)\n         |          |                              |             |            |        3 | high level of education         | 16 (17.8%)\n      ---+----------+------------------------------+-------------+------------+----------+---------------------------------+-----------\n      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%) |  [7, 28] |                                 |         97\n      ---------------------------------------------------------------------------------------------------------------------------------\n\n---\n\n    Code\n      print(out, table_width = \"auto\", remove_duplicates = FALSE)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                        | Type        |   Missings\n      ---+----------+------------------------------+-------------+-----------\n      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%)\n         |          | care per week                |             |           \n      ---+----------+------------------------------+-------------+-----------\n      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%)\n         |          |                              |             |           \n      ---+----------+------------------------------+-------------+-----------\n      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%)\n         |          |                              |             |           \n         |          |                              |             |           \n         |          |                              |             |           \n      ---+----------+------------------------------+-------------+-----------\n      4  | c172code | carer's level of education   | numeric     | 10 (10.0%)\n         |          |                              |             |           \n         |          |                              |             |           \n      ---+----------+------------------------------+-------------+-----------\n      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%)\n      -----------------------------------------------------------------------\n      \n      ID |   Values | Value Labels                    |          N\n      ---+----------+---------------------------------+-----------\n      1  | [5, 168] |                                 |         98\n         |          |                                 |           \n      ---+----------+---------------------------------+-----------\n      2  |        1 | male                            | 46 (46.0%)\n         |        2 | female                          | 54 (54.0%)\n      ---+----------+---------------------------------+-----------\n      3  |        1 | independent                     |  2 ( 2.1%)\n         |        2 | slightly dependent              |  4 ( 4.1%)\n         |        3 | moderately dependent            | 28 (28.9%)\n         |        4 | severely dependent              | 63 (64.9%)\n      ---+----------+---------------------------------+-----------\n      4  |        1 | low level of education          |  8 ( 8.9%)\n         |        2 | intermediate level of education | 66 (73.3%)\n         |        3 | high level of education         | 16 (17.8%)\n      ---+----------+---------------------------------+-----------\n      5  |  [7, 28] |                                 |         97\n      ------------------------------------------------------------\n\n---\n\n    Code\n      print(out, table_width = \"auto\", remove_duplicates = TRUE)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                        | Type        |   Missings\n      ---+----------+------------------------------+-------------+-----------\n      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%)\n         |          | care per week                |             |           \n      ---+----------+------------------------------+-------------+-----------\n      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%)\n      ---+----------+------------------------------+-------------+-----------\n      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%)\n      ---+----------+------------------------------+-------------+-----------\n      4  | c172code | carer's level of education   | numeric     | 10 (10.0%)\n      ---+----------+------------------------------+-------------+-----------\n      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%)\n      -----------------------------------------------------------------------\n      \n      ID |   Values | Value Labels                    |          N\n      ---+----------+---------------------------------+-----------\n      1  | [5, 168] |                                 |         98\n      ---+----------+---------------------------------+-----------\n      2  |        1 | male                            | 46 (46.0%)\n         |        2 | female                          | 54 (54.0%)\n      ---+----------+---------------------------------+-----------\n      3  |        1 | independent                     |  2 ( 2.1%)\n         |        2 | slightly dependent              |  4 ( 4.1%)\n         |        3 | moderately dependent            | 28 (28.9%)\n         |        4 | severely dependent              | 63 (64.9%)\n      ---+----------+---------------------------------+-----------\n      4  |        1 | low level of education          |  8 ( 8.9%)\n         |        2 | intermediate level of education | 66 (73.3%)\n         |        3 | high level of education         | 16 (17.8%)\n      ---+----------+---------------------------------+-----------\n      5  |  [7, 28] |                                 |         97\n      ------------------------------------------------------------\n\n# data_codebook efc, value_label_width\n\n    Code\n      print(out, table_width = Inf)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                        | Type        |   Missings |   Values | Value Labels     |          N\n      ---+----------+------------------------------+-------------+------------+----------+------------------+-----------\n      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%) | [5, 168] |                  |         98\n         |          | care per week                |             |            |          |                  |           \n      ---+----------+------------------------------+-------------+------------+----------+------------------+-----------\n      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%) |        1 | male             | 46 (46.0%)\n         |          |                              |             |            |        2 | female           | 54 (54.0%)\n      ---+----------+------------------------------+-------------+------------+----------+------------------+-----------\n      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%) |        1 | independent      |  2 ( 2.1%)\n         |          |                              |             |            |        2 | slightly...      |  4 ( 4.1%)\n         |          |                              |             |            |        3 | moderately...    | 28 (28.9%)\n         |          |                              |             |            |        4 | severely...      | 63 (64.9%)\n      ---+----------+------------------------------+-------------+------------+----------+------------------+-----------\n      4  | c172code | carer's level of education   | numeric     | 10 (10.0%) |        1 | low level of...  |  8 ( 8.9%)\n         |          |                              |             |            |        2 | intermediate...  | 66 (73.3%)\n         |          |                              |             |            |        3 | high level of... | 16 (17.8%)\n      ---+----------+------------------------------+-------------+------------+----------+------------------+-----------\n      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%) |  [7, 28] |                  |         97\n      ------------------------------------------------------------------------------------------------------------------\n\n---\n\n    Code\n      print(out, table_width = \"auto\", remove_duplicates = FALSE)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                        | Type        |   Missings\n      ---+----------+------------------------------+-------------+-----------\n      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%)\n         |          | care per week                |             |           \n      ---+----------+------------------------------+-------------+-----------\n      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%)\n         |          |                              |             |           \n      ---+----------+------------------------------+-------------+-----------\n      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%)\n         |          |                              |             |           \n         |          |                              |             |           \n         |          |                              |             |           \n      ---+----------+------------------------------+-------------+-----------\n      4  | c172code | carer's level of education   | numeric     | 10 (10.0%)\n         |          |                              |             |           \n         |          |                              |             |           \n      ---+----------+------------------------------+-------------+-----------\n      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%)\n      -----------------------------------------------------------------------\n      \n      ID |   Values | Value Labels     |          N\n      ---+----------+------------------+-----------\n      1  | [5, 168] |                  |         98\n         |          |                  |           \n      ---+----------+------------------+-----------\n      2  |        1 | male             | 46 (46.0%)\n         |        2 | female           | 54 (54.0%)\n      ---+----------+------------------+-----------\n      3  |        1 | independent      |  2 ( 2.1%)\n         |        2 | slightly...      |  4 ( 4.1%)\n         |        3 | moderately...    | 28 (28.9%)\n         |        4 | severely...      | 63 (64.9%)\n      ---+----------+------------------+-----------\n      4  |        1 | low level of...  |  8 ( 8.9%)\n         |        2 | intermediate...  | 66 (73.3%)\n         |        3 | high level of... | 16 (17.8%)\n      ---+----------+------------------+-----------\n      5  |  [7, 28] |                  |         97\n      ---------------------------------------------\n\n---\n\n    Code\n      print(out, table_width = \"auto\", remove_duplicates = TRUE)\n    Output\n      efc (100 rows and 5 variables, 5 shown)\n      \n      ID | Name     | Label                        | Type        |   Missings\n      ---+----------+------------------------------+-------------+-----------\n      1  | c12hour  | average number of hours of   | numeric     |   2 (2.0%)\n         |          | care per week                |             |           \n      ---+----------+------------------------------+-------------+-----------\n      2  | e16sex   | elder's gender               | numeric     |   0 (0.0%)\n      ---+----------+------------------------------+-------------+-----------\n      3  | e42dep   | elder's dependency           | categorical |   3 (3.0%)\n      ---+----------+------------------------------+-------------+-----------\n      4  | c172code | carer's level of education   | numeric     | 10 (10.0%)\n      ---+----------+------------------------------+-------------+-----------\n      5  | neg_c_7  | Negative impact with 7 items | numeric     |   3 (3.0%)\n      -----------------------------------------------------------------------\n      \n      ID |   Values | Value Labels     |          N\n      ---+----------+------------------+-----------\n      1  | [5, 168] |                  |         98\n      ---+----------+------------------+-----------\n      2  |        1 | male             | 46 (46.0%)\n         |        2 | female           | 54 (54.0%)\n      ---+----------+------------------+-----------\n      3  |        1 | independent      |  2 ( 2.1%)\n         |        2 | slightly...      |  4 ( 4.1%)\n         |        3 | moderately...    | 28 (28.9%)\n         |        4 | severely...      | 63 (64.9%)\n      ---+----------+------------------+-----------\n      4  |        1 | low level of...  |  8 ( 8.9%)\n         |        2 | intermediate...  | 66 (73.3%)\n         |        3 | high level of... | 16 (17.8%)\n      ---+----------+------------------+-----------\n      5  |  [7, 28] |                  |         97\n      ---------------------------------------------\n\n# data_codebook truncated data\n\n    Code\n      data_codebook(d, max_values = 5)\n    Output\n      d (100 rows and 2 variables, 2 shown)\n      \n      ID | Name | Type      | Missings |  Values |        N\n      ---+------+-----------+----------+---------+---------\n      1  | a    | integer   | 0 (0.0%) | [1, 15] |      100\n      ---+------+-----------+----------+---------+---------\n      2  | b    | character | 0 (0.0%) |       a | 4 (4.0%)\n         |      |           |          |       b | 3 (3.0%)\n         |      |           |          |       c | 5 (5.0%)\n         |      |           |          |       d | 4 (4.0%)\n         |      |           |          |       e | 3 (3.0%)\n         |      |           |          |   (...) |         \n      -----------------------------------------------------\n\n# data_codebook mixed numeric lengths\n\n    Code\n      data_codebook(d)\n    Output\n      d (100 rows and 2 variables, 2 shown)\n      \n      ID | Name | Type    | Missings |  Values |          N\n      ---+------+---------+----------+---------+-----------\n      1  | a    | integer | 0 (0.0%) |       1 | 28 (28.0%)\n         |      |         |          |       2 | 26 (26.0%)\n         |      |         |          |       3 | 29 (29.0%)\n         |      |         |          |       4 | 17 (17.0%)\n      ---+------+---------+----------+---------+-----------\n      2  | b    | integer | 0 (0.0%) | [5, 15] |        100\n      -----------------------------------------------------\n\n# data_codebook mixed range_at\n\n    Code\n      data_codebook(d, range_at = 3)\n    Output\n      d (100 rows and 2 variables, 2 shown)\n      \n      ID | Name | Type    | Missings |  Values |   N\n      ---+------+---------+----------+---------+----\n      1  | a    | integer | 0 (0.0%) |  [1, 4] | 100\n      ---+------+---------+----------+---------+----\n      2  | b    | integer | 0 (0.0%) | [5, 15] | 100\n      ----------------------------------------------\n\n# data_codebook logicals\n\n    Code\n      data_codebook(d)\n    Output\n      d (100 rows and 3 variables, 3 shown)\n      \n      ID | Name | Type      | Missings |  Values |          N\n      ---+------+-----------+----------+---------+-----------\n      1  | a    | integer   | 0 (0.0%) | [1, 15] |        100\n      ---+------+-----------+----------+---------+-----------\n      2  | b    | character | 0 (0.0%) |       a | 26 (26.0%)\n         |      |           |          |       b | 38 (38.0%)\n         |      |           |          |       c | 36 (36.0%)\n      ---+------+-----------+----------+---------+-----------\n      3  | c    | logical   | 0 (0.0%) |   FALSE | 42 (42.0%)\n         |      |           |          |    TRUE | 58 (58.0%)\n      -------------------------------------------------------\n\n# data_codebook labelled data exceptions\n\n    Code\n      data_codebook(d)\n    Output\n      d (100 rows and 3 variables, 3 shown)\n      \n      ID | Name | Type    |   Missings | Values | Value Labels |          N\n      ---+------+---------+------------+--------+--------------+-----------\n      1  | f1   | integer | 17 (17.0%) |      1 | One          | 21 (25.3%)\n         |      |         |            |      2 | Two          | 20 (24.1%)\n         |      |         |            |      3 | Three        | 23 (27.7%)\n         |      |         |            |      5 | Five         | 19 (22.9%)\n      ---+------+---------+------------+--------+--------------+-----------\n      2  | f2   | integer |   0 (0.0%) |      1 | One          | 25 (25.0%)\n         |      |         |            |      2 | Two          | 20 (20.0%)\n         |      |         |            |      3 | Three        | 14 (14.0%)\n         |      |         |            |      4 | 4            | 17 (17.0%)\n         |      |         |            |      5 | Five         | 24 (24.0%)\n      ---+------+---------+------------+--------+--------------+-----------\n      3  | f3   | integer |   0 (0.0%) |      1 | One          | 21 (21.0%)\n         |      |         |            |      2 | Two          | 24 (24.0%)\n         |      |         |            |      3 | Three        | 16 (16.0%)\n         |      |         |            |      4 | Four         | 14 (14.0%)\n         |      |         |            |      5 | Five         | 25 (25.0%)\n      ---------------------------------------------------------------------\n\n# data_codebook labelled data factors\n\n    Code\n      data_codebook(d)\n    Output\n      d (100 rows and 3 variables, 3 shown)\n      \n      ID | Name | Type        | Missings | Values | Value Labels |          N\n      ---+------+-------------+----------+--------+--------------+-----------\n      1  | f1   | categorical | 0 (0.0%) |      a | A            | 35 (35.0%)\n         |      |             |          |      b | Bee          | 32 (32.0%)\n         |      |             |          |      c | Cee          | 33 (33.0%)\n      ---+------+-------------+----------+--------+--------------+-----------\n      2  | f2   | categorical | 0 (0.0%) |      a | A            | 30 (30.0%)\n         |      |             |          |      b | Bee          | 38 (38.0%)\n         |      |             |          |      c | Cee          | 32 (32.0%)\n      ---+------+-------------+----------+--------+--------------+-----------\n      3  | f3   | categorical | 0 (0.0%) |      a | A            | 23 (23.0%)\n         |      |             |          |      b | Bee          | 28 (28.0%)\n         |      |             |          |      c | Cee          | 49 (49.0%)\n      -----------------------------------------------------------------------\n\n# data_codebook works with numbers < 1\n\n    Code\n      data_codebook(d)\n    Output\n      d (6 rows and 2 variables, 2 shown)\n      \n      ID | Name | Type    | Missings | Values |         N\n      ---+------+---------+----------+--------+----------\n      1  | a    | numeric | 0 (0.0%) |      1 | 2 (33.3%)\n         |      |         |          |      2 | 2 (33.3%)\n         |      |         |          |      3 | 2 (33.3%)\n      ---+------+---------+----------+--------+----------\n      2  | b    | numeric | 0 (0.0%) |      0 | 3 (50.0%)\n         |      |         |          |      1 | 2 (33.3%)\n         |      |         |          |      2 | 1 (16.7%)\n      ---------------------------------------------------\n\n# data_codebook, big marks\n\n    Code\n      data_codebook(d)\n    Output\n      d (1,000,000 rows and 2 variables, 2 shown)\n      \n      ID | Name | Type        | Missings | Values |               N\n      ---+------+-------------+----------+--------+----------------\n      1  | f1   | categorical | 0 (0.0%) |      a | 333,238 (33.3%)\n         |      |             |          |      b | 332,910 (33.3%)\n         |      |             |          |      c | 333,852 (33.4%)\n      ---+------+-------------+----------+--------+----------------\n      2  | f2   | categorical | 0 (0.0%) |      1 | 333,285 (33.3%)\n         |      |             |          |      2 | 333,358 (33.3%)\n         |      |             |          |      3 | 333,357 (33.3%)\n      -------------------------------------------------------------\n\n# data_codebook, tagged NA\n\n    Code\n      data_codebook(data.frame(x))\n    Output\n      data.frame(x) (26 rows and 1 variables, 1 shown)\n      \n      ID | Name | Type    |   Missings | Values | Value Labels |         N\n      ---+------+---------+------------+--------+--------------+----------\n      1  | x    | numeric | 12 (46.2%) |      1 | Agreement    | 4 (15.4%)\n         |      |         |            |      2 | 2            | 4 (15.4%)\n         |      |         |            |      3 | 3            | 4 (15.4%)\n         |      |         |            |      4 | Disagreement | 2 ( 7.7%)\n         |      |         |            |  NA(a) | Refused      | 4 (15.4%)\n         |      |         |            |  NA(c) | First        | 5 (19.2%)\n         |      |         |            |  NA(z) | Not home     | 3 (11.5%)\n      --------------------------------------------------------------------\n\n---\n\n    Code\n      data_codebook(data.frame(x))\n    Output\n      data.frame(x) (23 rows and 1 variables, 1 shown)\n      \n      ID | Name | Type    |  Missings | Values | Value Labels |         N\n      ---+------+---------+-----------+--------+--------------+----------\n      1  | x    | numeric | 9 (39.1%) |      1 | Agreement    | 4 (17.4%)\n         |      |         |           |      2 | 2            | 4 (17.4%)\n         |      |         |           |      3 | 3            | 4 (17.4%)\n         |      |         |           |      4 | Disagreement | 2 ( 8.7%)\n         |      |         |           |  NA(a) | Refused      | 4 (17.4%)\n         |      |         |           |  NA(c) | First        | 5 (21.7%)\n      -------------------------------------------------------------------\n\n# data_codebook, negative label values #334\n\n    Code\n      data_codebook(data.frame(x1, x2))\n    Output\n      data.frame(x1, x2) (4 rows and 2 variables, 2 shown)\n      \n      ID | Name | Type    | Missings | Values | Value Labels |         N\n      ---+------+---------+----------+--------+--------------+----------\n      1  | x1   | integer | 0 (0.0%) |      1 | Agreement    | 1 (25.0%)\n         |      |         |          |      2 | 2            | 1 (25.0%)\n         |      |         |          |      3 | 3            | 1 (25.0%)\n         |      |         |          |      4 | Disagreement | 1 (25.0%)\n      ---+------+---------+----------+--------+--------------+----------\n      2  | x2   | numeric | 0 (0.0%) |     -9 | Missing      | 1 (25.0%)\n         |      |         |          |      1 | Agreement    | 1 (25.0%)\n         |      |         |          |      2 | 2            | 1 (25.0%)\n         |      |         |          |      3 | 3            | 1 (25.0%)\n      ------------------------------------------------------------------\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_modify.md",
    "content": "# data_modify message about recycling values\n\n    Code\n      head(data_modify(iris, Sepal.Width = 1))\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1          5.1           1          1.4         0.2  setosa\n      2          4.9           1          1.4         0.2  setosa\n      3          4.7           1          1.3         0.2  setosa\n      4          4.6           1          1.5         0.2  setosa\n      5          5.0           1          1.4         0.2  setosa\n      6          5.4           1          1.7         0.4  setosa\n\n---\n\n    Code\n      head(data_modify(iris, Sepal.Width = 1:2))\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1          5.1           1          1.4         0.2  setosa\n      2          4.9           2          1.4         0.2  setosa\n      3          4.7           1          1.3         0.2  setosa\n      4          4.6           2          1.5         0.2  setosa\n      5          5.0           1          1.4         0.2  setosa\n      6          5.4           2          1.7         0.4  setosa\n\n---\n\n    Code\n      head(data_modify(iris, Petal.Length = 1, Sepal.Width = 1))\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1          5.1           1            1         0.2  setosa\n      2          4.9           1            1         0.2  setosa\n      3          4.7           1            1         0.2  setosa\n      4          4.6           1            1         0.2  setosa\n      5          5.0           1            1         0.2  setosa\n      6          5.4           1            1         0.4  setosa\n\n---\n\n    Code\n      head(data_modify(iris, Petal.Length = 1, Sepal.Width = 1:2))\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1          5.1           1            1         0.2  setosa\n      2          4.9           2            1         0.2  setosa\n      3          4.7           1            1         0.2  setosa\n      4          4.6           2            1         0.2  setosa\n      5          5.0           1            1         0.2  setosa\n      6          5.4           2            1         0.4  setosa\n\n---\n\n    Code\n      head(data_modify(iris, Petal.Length = 2, Sepal.Width = 2))\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1          5.1           2            2         0.2  setosa\n      2          4.9           2            2         0.2  setosa\n      3          4.7           2            2         0.2  setosa\n      4          4.6           2            2         0.2  setosa\n      5          5.0           2            2         0.2  setosa\n      6          5.4           2            2         0.4  setosa\n\n# data_modify message about modified variables\n\n    Code\n      head(data_modify(iris, Sepal.Width = 2 * Sepal.Width))\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1          5.1         7.0          1.4         0.2  setosa\n      2          4.9         6.0          1.4         0.2  setosa\n      3          4.7         6.4          1.3         0.2  setosa\n      4          4.6         6.2          1.5         0.2  setosa\n      5          5.0         7.2          1.4         0.2  setosa\n      6          5.4         7.8          1.7         0.4  setosa\n\n---\n\n    Code\n      head(data_modify(iris, Petal.Length = Sepal.Length, Sepal.Width = Petal.Width))\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1          5.1         0.2          5.1         0.2  setosa\n      2          4.9         0.2          4.9         0.2  setosa\n      3          4.7         0.2          4.7         0.2  setosa\n      4          4.6         0.2          4.6         0.2  setosa\n      5          5.0         0.2          5.0         0.2  setosa\n      6          5.4         0.4          5.4         0.4  setosa\n\n# data_modify works with new expressions, different use cases same results\n\n    Code\n      print(head(out_complex))\n    Output\n        Species sepwid seplen half_petal new_var new_num new_var2 new_num2\n      1  setosa    7.0   25.5       0.70  string       1       ho        4\n      2  setosa    6.0   24.5       0.70  string       2       ho        5\n      3  setosa    6.4   23.5       0.65  string       3       ho        6\n      4  setosa    6.2   23.0       0.75  string       4       ho        4\n      5  setosa    7.2   25.0       0.70  string       5       ho        5\n      6  setosa    7.8   27.0       0.85  string       1       ho        6\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_partition.md",
    "content": "# data_partition works as expected\n\n    Code\n      data_partition(letters, seed = 123)\n    Output\n      $p_0.7\n         data .row_id\n      1     c       3\n      2     e       5\n      3     h       8\n      4     i       9\n      5     j      10\n      6     k      11\n      7     l      12\n      8     m      13\n      9     n      14\n      10    o      15\n      11    p      16\n      12    r      18\n      13    s      19\n      14    t      20\n      15    u      21\n      16    w      23\n      17    x      24\n      18    y      25\n      \n      $test\n        data .row_id\n      1    a       1\n      2    b       2\n      3    d       4\n      4    f       6\n      5    g       7\n      6    q      17\n      7    v      22\n      8    z      26\n      \n\n---\n\n    Code\n      str(data_partition(iris, proportion = 0.7, seed = 123))\n    Output\n      List of 2\n       $ p_0.7:'data.frame':\t105 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:105] 4.6 5.4 4.6 5 4.4 4.9 4.8 4.8 4.3 5.8 ...\n        ..$ Sepal.Width : num [1:105] 3.1 3.9 3.4 3.4 2.9 3.1 3.4 3 3 4 ...\n        ..$ Petal.Length: num [1:105] 1.5 1.7 1.4 1.5 1.4 1.5 1.6 1.4 1.1 1.2 ...\n        ..$ Petal.Width : num [1:105] 0.2 0.4 0.3 0.2 0.2 0.1 0.2 0.1 0.1 0.2 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:105] 4 6 7 8 9 10 12 13 14 15 ...\n       $ test :'data.frame':\t45 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:45] 5.1 4.9 4.7 5 5.4 5.1 5.7 5.2 5.2 5.2 ...\n        ..$ Sepal.Width : num [1:45] 3.5 3 3.2 3.6 3.7 3.5 3.8 3.5 3.4 4.1 ...\n        ..$ Petal.Length: num [1:45] 1.4 1.4 1.3 1.4 1.5 1.4 1.7 1.5 1.4 1.5 ...\n        ..$ Petal.Width : num [1:45] 0.2 0.2 0.2 0.2 0.2 0.3 0.3 0.2 0.2 0.1 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:45] 1 2 3 5 11 18 19 28 29 33 ...\n\n---\n\n    Code\n      str(data_partition(iris, proportion = c(0.2, 0.5), seed = 123))\n    Output\n      List of 3\n       $ p_0.2:'data.frame':\t30 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:30] 4.6 4.4 4.3 4.6 5 5 5.4 5 4.4 5 ...\n        ..$ Sepal.Width : num [1:30] 3.4 2.9 3 3.6 3 3.4 3.4 3.5 3.2 3.3 ...\n        ..$ Petal.Length: num [1:30] 1.4 1.4 1.1 1 1.6 1.6 1.5 1.3 1.3 1.4 ...\n        ..$ Petal.Width : num [1:30] 0.3 0.2 0.1 0.2 0.2 0.4 0.4 0.3 0.2 0.2 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:30] 7 9 14 23 26 27 32 41 43 50 ...\n       $ p_0.5:'data.frame':\t75 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:75] 4.6 5.4 5 4.9 4.8 5.8 5.7 5.4 5.1 5.7 ...\n        ..$ Sepal.Width : num [1:75] 3.1 3.9 3.4 3.1 3.4 4 4.4 3.9 3.5 3.8 ...\n        ..$ Petal.Length: num [1:75] 1.5 1.7 1.5 1.5 1.6 1.2 1.5 1.3 1.4 1.7 ...\n        ..$ Petal.Width : num [1:75] 0.2 0.4 0.2 0.1 0.2 0.2 0.4 0.4 0.3 0.3 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:75] 4 6 8 10 12 15 16 17 18 19 ...\n       $ test :'data.frame':\t45 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:45] 5.1 4.9 4.7 5 5.4 4.8 5.4 5.1 5.2 4.9 ...\n        ..$ Sepal.Width : num [1:45] 3.5 3 3.2 3.6 3.7 3 3.4 3.7 4.1 3.1 ...\n        ..$ Petal.Length: num [1:45] 1.4 1.4 1.3 1.4 1.5 1.4 1.7 1.5 1.5 1.5 ...\n        ..$ Petal.Width : num [1:45] 0.2 0.2 0.2 0.2 0.2 0.1 0.2 0.4 0.1 0.2 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:45] 1 2 3 5 11 13 21 22 33 35 ...\n\n---\n\n    Code\n      str(data_partition(iris, proportion = 0.7, by = \"Species\", seed = 123))\n    Output\n      List of 2\n       $ p_0.7:'data.frame':\t105 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:105] 4.7 4.6 5 4.6 5 4.4 4.9 5.4 4.8 4.8 ...\n        ..$ Sepal.Width : num [1:105] 3.2 3.1 3.6 3.4 3.4 2.9 3.1 3.7 3.4 3 ...\n        ..$ Petal.Length: num [1:105] 1.3 1.5 1.4 1.4 1.5 1.4 1.5 1.5 1.6 1.4 ...\n        ..$ Petal.Width : num [1:105] 0.2 0.2 0.2 0.3 0.2 0.2 0.1 0.2 0.2 0.1 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:105] 3 4 5 7 8 9 10 11 12 13 ...\n       $ test :'data.frame':\t45 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:45] 5.1 4.9 5.4 5.7 5.1 5.1 5.1 4.6 5.5 4.9 ...\n        ..$ Sepal.Width : num [1:45] 3.5 3 3.9 4.4 3.5 3.8 3.7 3.6 4.2 3.1 ...\n        ..$ Petal.Length: num [1:45] 1.4 1.4 1.7 1.5 1.4 1.5 1.5 1 1.4 1.5 ...\n        ..$ Petal.Width : num [1:45] 0.2 0.2 0.4 0.4 0.3 0.3 0.4 0.2 0.2 0.2 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:45] 1 2 6 16 18 20 22 23 34 35 ...\n\n---\n\n    Code\n      str(data_partition(iris, proportion = c(0.2, 0.5), by = \"Species\", seed = 123))\n    Output\n      List of 3\n       $ p_0.2:'data.frame':\t30 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:30] 4.7 4.3 5.8 4.8 5 4.8 5.5 4.5 4.4 4.6 ...\n        ..$ Sepal.Width : num [1:30] 3.2 3 4 3.4 3 3.1 3.5 2.3 3.2 3.2 ...\n        ..$ Petal.Length: num [1:30] 1.3 1.1 1.2 1.9 1.6 1.6 1.3 1.3 1.3 1.4 ...\n        ..$ Petal.Width : num [1:30] 0.2 0.1 0.2 0.2 0.2 0.2 0.2 0.3 0.2 0.2 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:30] 3 14 15 25 26 31 37 42 43 48 ...\n       $ p_0.5:'data.frame':\t75 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:75] 5 5.4 5 4.4 4.9 5.4 4.8 4.8 5.7 5.4 ...\n        ..$ Sepal.Width : num [1:75] 3.6 3.9 3.4 2.9 3.1 3.7 3.4 3 4.4 3.9 ...\n        ..$ Petal.Length: num [1:75] 1.4 1.7 1.5 1.4 1.5 1.5 1.6 1.4 1.5 1.3 ...\n        ..$ Petal.Width : num [1:75] 0.2 0.4 0.2 0.2 0.1 0.2 0.2 0.1 0.4 0.4 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:75] 5 6 8 9 10 11 12 13 16 17 ...\n       $ test :'data.frame':\t45 obs. of  6 variables:\n        ..$ Sepal.Length: num [1:45] 5.1 4.9 4.6 4.6 5.7 5.4 4.6 5 5.2 4.7 ...\n        ..$ Sepal.Width : num [1:45] 3.5 3 3.1 3.4 3.8 3.4 3.6 3.4 3.5 3.2 ...\n        ..$ Petal.Length: num [1:45] 1.4 1.4 1.5 1.4 1.7 1.7 1 1.6 1.5 1.6 ...\n        ..$ Petal.Width : num [1:45] 0.2 0.2 0.2 0.3 0.3 0.2 0.2 0.4 0.2 0.2 ...\n        ..$ Species     : Factor w/ 3 levels \"setosa\",\"versicolor\",..: 1 1 1 1 1 1 1 1 1 1 ...\n        ..$ .row_id     : int [1:45] 1 2 4 7 19 21 23 27 28 30 ...\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_peek.md",
    "content": "# data_peek snapshots look as expected\n\n    Code\n      data_peek(iris)\n    Output\n      Data frame with 150 rows and 5 variables\n      \n      Variable     | Type    | Values                                        \n      -----------------------------------------------------------------------\n      Sepal.Length | numeric | 5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6, 5, 4.4, ...  \n      Sepal.Width  | numeric | 3.5, 3, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, ...\n      Petal.Length | numeric | 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, ...   \n      Petal.Width  | numeric | 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, ...   \n      Species      | factor  | setosa, setosa, setosa, setosa, setosa, ...   \n\n---\n\n    Code\n      data_peek(iris, select = 1:3)\n    Output\n      Data frame with 150 rows and 5 variables\n      \n      Variable     | Type    | Values                                        \n      -----------------------------------------------------------------------\n      Sepal.Length | numeric | 5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6, 5, 4.4, ...  \n      Sepal.Width  | numeric | 3.5, 3, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, ...\n      Petal.Length | numeric | 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, ...   \n\n---\n\n    Code\n      data_peek(iris, width = 130)\n    Output\n      Data frame with 150 rows and 5 variables\n      \n      Variable     | Type    | Values                                                                                                  \n      ---------------------------------------------------------------------------------------------------------------------------------\n      Sepal.Length | numeric | 5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6, 5, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, ...\n      Sepal.Width  | numeric | 3.5, 3, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3, 3, 4, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, ...    \n      Petal.Length | numeric | 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, ... \n      Petal.Width  | numeric | 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, ... \n      Species      | factor  | setosa, setosa, setosa, setosa, setosa, setosa, setosa, setosa, setosa, setosa, setosa, setosa, ...     \n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_read.md",
    "content": "# data_read, convert many labels correctly\n\n    Code\n      data_tabulate(d$selv1)\n    Output\n      d$selv1 <categorical>\n      # total N=2413 valid N=2413\n      \n      Value                                              |   N | Raw % | Valid % | Cumulative %\n      ---------------------------------------------------+-----+-------+---------+-------------\n      Vignette 1 weiblich (Gülsen E. Reinigungskraft B)  | 150 |  6.22 |    6.22 |         6.22\n      Vignette 2 weiblich (Gülsen E. Anwältin B)         | 150 |  6.22 |    6.22 |        12.43\n      Vignette 3 weiblich (Monika E. Reinigungskraft B)  | 150 |  6.22 |    6.22 |        18.65\n      Vignette 4 weiblich (Monika E. Anwältin B)         | 151 |  6.26 |    6.26 |        24.91\n      Vignette 5 männlich (Hasan E. Reinigungskraft B)   | 151 |  6.26 |    6.26 |        31.16\n      Vignette 6 männlich (Hasan E. Anwalt B)            | 153 |  6.34 |    6.34 |        37.51\n      Vignette 7 männlich (Martin E. Reinigungskraft B)  | 150 |  6.22 |    6.22 |        43.72\n      Vignette 8 männlich (Martin E. Anwalt B)           | 150 |  6.22 |    6.22 |        49.94\n      Vignette 9 weiblich (Gülsen E. Reinigungskraft E)  | 151 |  6.26 |    6.26 |        56.20\n      Vignette 10 weiblich (Gülsen E. Anwältin E)        | 150 |  6.22 |    6.22 |        62.41\n      Vignette 11 weiblich (Monika E. Reinigungskraft E) | 150 |  6.22 |    6.22 |        68.63\n      Vignette 12 weiblich (Monika E. Anwältin E)        | 151 |  6.26 |    6.26 |        74.89\n      Vignette 13 männlich (Hasan E. Reinigungskraft E)  | 155 |  6.42 |    6.42 |        81.31\n      Vignette 14 männlich (Hasan E. Anwalt E)           | 150 |  6.22 |    6.22 |        87.53\n      Vignette 15 männlich (Martin E. Reinigungskraft E) | 150 |  6.22 |    6.22 |        93.74\n      Vignette 16 männlich (Martin E. Anwalt E)          | 151 |  6.26 |    6.26 |       100.00\n      <NA>                                               |   0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      data_tabulate(d$c12)\n    Output\n      Sind oder waren Sie schon einmal selbst von solchen Beschwerden betroffen? (d$c12) <categorical>\n      # total N=2413 valid N=2413\n      \n      Value        |    N | Raw % | Valid % | Cumulative %\n      -------------+------+-------+---------+-------------\n      ja           |  786 | 32.57 |   32.57 |        32.57\n      nein         | 1616 | 66.97 |   66.97 |        99.54\n      keine Angabe |   11 |  0.46 |    0.46 |       100.00\n      <NA>         |    0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      data_tabulate(d$c12a)\n    Output\n      Haben Sie deswegen Behandlung(en) in Anspruch genommen? (d$c12a) <categorical>\n      # total N=2413 valid N=2413\n      \n      Value        |    N | Raw % | Valid % | Cumulative %\n      -------------+------+-------+---------+-------------\n      Filter       | 1627 | 67.43 |   67.43 |        67.43\n      ja           |  500 | 20.72 |   20.72 |        88.15\n      nein         |  285 | 11.81 |   11.81 |        99.96\n      keine Angabe |    1 |  0.04 |    0.04 |       100.00\n      <NA>         |    0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      data_tabulate(d$c12c)\n    Output\n      Wie sehr haben diese Behandlung(en) Ihre Beeinträchtigung durch die Beschwerden verbessert? (d$c12c) <categorical>\n      # total N=2413 valid N=2413\n      \n      Value                     |    N | Raw % | Valid % | Cumulative %\n      --------------------------+------+-------+---------+-------------\n      Filter                    | 1913 | 79.28 |   79.28 |        79.28\n      0 = keine                 |   34 |  1.41 |    1.41 |        80.69\n      1                         |    2 |  0.08 |    0.08 |        80.77\n      2                         |   11 |  0.46 |    0.46 |        81.23\n      3                         |   14 |  0.58 |    0.58 |        81.81\n      4                         |   19 |  0.79 |    0.79 |        82.59\n      5                         |   61 |  2.53 |    2.53 |        85.12\n      6                         |   42 |  1.74 |    1.74 |        86.86\n      7                         |   63 |  2.61 |    2.61 |        89.47\n      8                         |   97 |  4.02 |    4.02 |        93.49\n      9                         |   53 |  2.20 |    2.20 |        95.69\n      10 = sehr starke          |   99 |  4.10 |    4.10 |        99.79\n      weiß nicht / keine Angabe |    5 |  0.21 |    0.21 |       100.00\n      <NA>                      |    0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      table(d$selv1)\n    Output\n      \n        1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16 \n      150 150 150 151 151 153 150 150 151 150 150 151 155 150 150 151 \n\n---\n\n    Code\n      table(d$c12)\n    Output\n      \n         1    2   99 \n       786 1616   11 \n\n---\n\n    Code\n      table(d$c12a)\n    Output\n      \n        -2    1    2   99 \n      1627  500  285    1 \n\n---\n\n    Code\n      table(d$c12c)\n    Output\n      \n        -2    0    1    2    3    4    5    6    7    8    9   10   99 \n      1913   34    2   11   14   19   61   42   63   97   53   99    5 \n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_rescale.md",
    "content": "# rescale works as expected\n\n    Code\n      head(rescale(iris, to = c(0, 1)))\n    Message\n      Variables of class `factor` can't be rescaled and remain unchanged.\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1   0.22222222   0.6250000   0.06779661  0.04166667  setosa\n      2   0.16666667   0.4166667   0.06779661  0.04166667  setosa\n      3   0.11111111   0.5000000   0.05084746  0.04166667  setosa\n      4   0.08333333   0.4583333   0.08474576  0.04166667  setosa\n      5   0.19444444   0.6666667   0.06779661  0.04166667  setosa\n      6   0.30555556   0.7916667   0.11864407  0.12500000  setosa\n\n---\n\n    Code\n      head(rescale(iris, to = c(0, 1), select = \"Sepal.Length\"))\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1   0.22222222         3.5          1.4         0.2  setosa\n      2   0.16666667         3.0          1.4         0.2  setosa\n      3   0.11111111         3.2          1.3         0.2  setosa\n      4   0.08333333         3.1          1.5         0.2  setosa\n      5   0.19444444         3.6          1.4         0.2  setosa\n      6   0.30555556         3.9          1.7         0.4  setosa\n\n---\n\n    Code\n      head(rescale(iris, to = list(Sepal.Length = c(0, 1), Petal.Length = c(-1, 0))))\n    Message\n      Variables of class `factor` can't be rescaled and remain unchanged.\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n      1   0.22222222         3.5   -0.9322034         0.2  setosa\n      2   0.16666667         3.0   -0.9322034         0.2  setosa\n      3   0.11111111         3.2   -0.9491525         0.2  setosa\n      4   0.08333333         3.1   -0.9152542         0.2  setosa\n      5   0.19444444         3.6   -0.9322034         0.2  setosa\n      6   0.30555556         3.9   -0.8813559         0.4  setosa\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_seek.md",
    "content": "# data_seek - print\n\n    Code\n      data_seek(iris, \"Length\")\n    Output\n      index |       column |       labels\n      -----------------------------------\n          1 | Sepal.Length | Sepal.Length\n          3 | Petal.Length | Petal.Length\n\n---\n\n    Code\n      data_seek(iris, \"abc\")\n    Output\n      No matches found.\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_separate.md",
    "content": "# data_separate: multiple columns\n\n    Code\n      out\n    Output\n        x_1 x_2  x_3  y_1  y_2  y_3\n      1   1   a    6    m    n   99\n      2   2   b    7   77    f    g\n      3   3   c    8   44    9 <NA>\n      4   5   j <NA> <NA> <NA> <NA>\n\n---\n\n    Code\n      out\n    Output\n        x_1 x_2  x_3  y_1  y_2  y_3\n      1   1   a    6    m    n   99\n      2   2   b  7 d   77    f    g\n      3   3   c    8   44    9 <NA>\n      4   5   j <NA> <NA> <NA> <NA>\n\n---\n\n    Code\n      out\n    Output\n        x_A x_B  x_C  y_A  y_B  y_C\n      1   1   a    6    m    n   99\n      2   2   b  7 d   77    f    g\n      3   3   c    8   44    9 <NA>\n      4   5   j <NA> <NA> <NA> <NA>\n\n---\n\n    Code\n      out\n    Output\n              x      y x_A x_B  x_C  y_A  y_B  y_C\n      1   1.a.6 m.n.99   1   a    6    m    n   99\n      2 2.b.7.d 77.f.g   2   b  7 d   77    f    g\n      3   3.c.8   44.9   3   c    8   44    9 <NA>\n      4     5.j   <NA>   5   j <NA> <NA> <NA> <NA>\n\n---\n\n    Code\n      out\n    Output\n        x_1 x_2  x_3  y_1  y_2  y_3\n      1   1   a    6    m    n   99\n      2   b   7    d   77    f    g\n      3   3   c    8   44    9 <NA>\n      4   5   j <NA> <NA> <NA> <NA>\n\n---\n\n    Code\n      out\n    Output\n              x      y x_A x_B x_C  y_A  y_B  y_C\n      1   1.a.6 m.n.99   1   a   6    m    n   99\n      2 2.b.7.d 77.f.g   2   b 7 d   77    f    g\n      3   3.c.8   44.9   3   c   8   44    9    9\n      4     5.j   <NA>   5   j   j <NA> <NA> <NA>\n\n---\n\n    Code\n      out\n    Output\n              x      y   A   B   C\n      1   1.a.6 m.n.99  1m  an 699\n      2 2.b.7.d 77.f.g 277  bf 7dg\n      3   3.c.8   44.9 344  c9  89\n      4     5.j   <NA> 5NA jNA jNA\n\n---\n\n    Code\n      out\n    Output\n              x      y   A   B    C\n      1   1.a.6 m.n.99  1m  an  699\n      2 2.b.7.d 77.f.g 277  bf   7g\n      3   3.c.8   44.9 344  c9  8NA\n      4     5.j   <NA> 5NA jNA NANA\n\n---\n\n    Code\n      out\n    Output\n        x_1 x_2 x_3  y_1  y_2  y_3\n      1   1   a   6    m    n   99\n      2   2   b   7   77    f    g\n      3   3   c   8   44   44    9\n      4   5   5   j <NA> <NA> <NA>\n\n# data_separate: multiple columns, different lengths\n\n    Code\n      out\n    Output\n        A B    C   EE   FF   GG\n      1 1 a    6    m    n   99\n      2 2 b    7   77    f    g\n      3 3 c    8   44    9 <NA>\n      4 5 j <NA> <NA> <NA> <NA>\n\n---\n\n    Code\n      out\n    Output\n        A B    C   EE   FF   GG   HH\n      1 1 a    6    m    n   99   22\n      2 2 b    7   77    f    g   34\n      3 3 c    8   44    9 <NA> <NA>\n      4 5 j <NA> <NA> <NA> <NA> <NA>\n\n# data_separate: fail if invalid column selected\n\n    Code\n      data_separate(d_sep, guess_columns = \"mode\", select = NULL)\n    Message\n      Column `x` had different number of values after splitting. Variable was\n        split into 3 columns.\n      `x` returned more columns than expected after splitting. Right-most\n        columns have been dropped.\n      `x`returned fewer columns than expected after splitting. Right-most\n        columns were filled with `NA`.\n      Column `y` had different number of values after splitting. Variable was\n        split into 3 columns.\n      `y`returned fewer columns than expected after splitting. Right-most\n        columns were filled with `NA`.\n    Output\n        x_1 x_2  x_3  y_1  y_2  y_3\n      1   1   a    6    m    n   99\n      2   2   b    7   77    f    g\n      3   3   c    8   44    9 <NA>\n      4   5   j <NA> <NA> <NA> <NA>\n\n# data_separate: numeric column\n\n    Code\n      out\n    Output\n              y x_1 x_2 x_3 x_4\n      V1 m.n.99  15 435 352   3\n      V2 77.f.g  53 554 353   2\n      V3   44.9  12 342 422    \n      V4   <NA>  15 454 334 535\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_summary.md",
    "content": "# data_summary, print\n\n    Code\n      print(out)\n    Output\n      am | gear |    MW |   SD\n      ------------------------\n       0 |    3 | 16.11 | 3.37\n       0 |    4 | 21.05 | 3.07\n       1 |    4 | 26.27 | 5.41\n       1 |    5 | 21.38 | 6.66\n\n# data_summary, with NA\n\n    Code\n      print(out)\n    Output\n      c172code |    MW\n      ----------------\n             1 | 87.12\n             2 | 94.05\n             3 | 75.00\n          <NA> | 47.80\n\n---\n\n    Code\n      print(out)\n    Output\n      c172code |    MW\n      ----------------\n             1 | 87.12\n             2 | 94.05\n             3 | 75.00\n\n---\n\n    Code\n      print(out)\n    Output\n      e42dep | c172code |     MW\n      --------------------------\n      1      |        2 |  17.00\n      2      |        2 |  34.25\n      3      |        1 |  39.50\n      3      |        2 |  52.44\n      3      |        3 |  52.00\n      3      |     <NA> |  84.00\n      4      |        1 | 134.75\n      4      |        2 | 119.26\n      4      |        3 |  88.80\n      4      |     <NA> |  43.29\n      <NA>   |        2 |   <NA>\n      <NA>   |     <NA> |   7.00\n\n# data_summary, bayestestR::ci\n\n    Code\n      out\n    Output\n      am | gear | mean_value |         95% CI\n      ---------------------------------------\n       0 |    3 |      16.11 | [10.40, 21.46]\n       0 |    4 |      21.05 | [17.91, 24.28]\n       1 |    4 |      26.27 | [21.00, 33.64]\n       1 |    5 |      21.38 | [15.08, 29.96]\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_tabulate.md",
    "content": "# data_tabulate, tinytable\n\n    Code\n      display(data_tabulate(efc$c172code), format = \"tt\")\n    Output\n      \n      +-------+----+-------+---------+--------------+\n      | Value | N  | Raw % | Valid % | Cumulative % |\n      +=======+====+=======+=========+==============+\n      | 1     | 8  | 8     | 8.89    | 8.89         |\n      +-------+----+-------+---------+--------------+\n      | 2     | 66 | 66    | 73.33   | 82.22        |\n      +-------+----+-------+---------+--------------+\n      | 3     | 16 | 16    | 17.78   | 100.00       |\n      +-------+----+-------+---------+--------------+\n      | (NA)  | 10 | 10    | (NA)    | (NA)         |\n      +=======+====+=======+=========+==============+\n      | total N=100 valid N=90                      |\n      +=======+====+=======+=========+==============+\n      Table: carer's level of education (efc$c172code) (numeric) \n\n---\n\n    Code\n      display(data_tabulate(efc, \"c172code\"), format = \"tt\")\n    Output\n      \n      +-------+----+-------+---------+--------------+\n      | Value | N  | Raw % | Valid % | Cumulative % |\n      +=======+====+=======+=========+==============+\n      | 1     | 8  | 8     | 8.89    | 8.89         |\n      +-------+----+-------+---------+--------------+\n      | 2     | 66 | 66    | 73.33   | 82.22        |\n      +-------+----+-------+---------+--------------+\n      | 3     | 16 | 16    | 17.78   | 100.00       |\n      +-------+----+-------+---------+--------------+\n      | (NA)  | 10 | 10    | (NA)    | (NA)         |\n      +=======+====+=======+=========+==============+\n      | total N=100 valid N=90                      |\n      +=======+====+=======+=========+==============+\n      Table: carer's level of education (c172code) (numeric) \n\n# data_tabulate, weights\n\n    Code\n      print(data_tabulate(efc$e42dep, weights = efc$weights))\n    Output\n      elder's dependency (efc$e42dep) <categorical>\n      # total N=105 valid N=100 (weighted)\n      \n      Value |  N | Raw % | Valid % | Cumulative %\n      ------+----+-------+---------+-------------\n      1     |  3 |  2.86 |       3 |            3\n      2     |  4 |  3.81 |       4 |            7\n      3     | 26 | 24.76 |      26 |           33\n      4     | 67 | 63.81 |      67 |          100\n      <NA>  |  5 |  4.76 |    <NA> |         <NA>\n\n---\n\n    Code\n      print_md(data_tabulate(efc$e42dep, weights = efc$weights))\n    Output\n      \n      \n      Table: elder's dependency (efc$e42dep) (categorical)\n      \n      |Value |  N| Raw %| Valid %| Cumulative %|\n      |:-----|--:|-----:|-------:|------------:|\n      |1     |  3|  2.86|       3|            3|\n      |2     |  4|  3.81|       4|            7|\n      |3     | 26| 24.76|      26|           33|\n      |4     | 67| 63.81|      67|          100|\n      |(NA)  |  5|  4.76|    (NA)|         (NA)|\n      total N=105 valid N=100 (weighted)\n      \n      \n\n---\n\n    Code\n      display(data_tabulate(efc$e42dep, weights = efc$weights))\n    Output\n      \n      \n      Table: elder's dependency (efc$e42dep) (categorical)\n      \n      |Value |  N| Raw %| Valid %| Cumulative %|\n      |:-----|--:|-----:|-------:|------------:|\n      |1     |  3|  2.86|       3|            3|\n      |2     |  4|  3.81|       4|            7|\n      |3     | 26| 24.76|      26|           33|\n      |4     | 67| 63.81|      67|          100|\n      |(NA)  |  5|  4.76|    (NA)|         (NA)|\n      total N=105 valid N=100 (weighted)\n      \n      \n\n---\n\n    Code\n      print(data_tabulate(efc, c(\"e42dep\", \"e16sex\"), collapse = TRUE, weights = efc$\n        weights))\n    Output\n      # Frequency Table (weighted)\n      \n      Variable | Value |  N | Raw % | Valid % | Cumulative %\n      ---------+-------+----+-------+---------+-------------\n      e42dep   |     1 |  3 |  2.86 |       3 |            3\n               |     2 |  4 |  3.81 |       4 |            7\n               |     3 | 26 | 24.76 |      26 |           33\n               |     4 | 67 | 63.81 |      67 |          100\n               |  <NA> |  5 |  4.76 |    <NA> |         <NA>\n      ---------+-------+----+-------+---------+-------------\n      e16sex   |     1 | 50 | 47.62 |   47.62 |        47.62\n               |     2 | 55 | 52.38 |   52.38 |       100.00\n               |  <NA> |  0 |  0.00 |    <NA> |         <NA>\n      ------------------------------------------------------\n\n---\n\n    Code\n      print_md(data_tabulate(efc, c(\"e42dep\", \"e16sex\"), weights = efc$weights))\n    Output\n      \n      \n      Table: Frequency Table (weighted)\n      \n      |Variable | Value|  N| Raw %| Valid %| Cumulative %|\n      |:--------|-----:|--:|-----:|-------:|------------:|\n      |e42dep   |     1|  3|  2.86|       3|            3|\n      |         |     2|  4|  3.81|       4|            7|\n      |         |     3| 26| 24.76|      26|           33|\n      |         |     4| 67| 63.81|      67|          100|\n      |         |  (NA)|  5|  4.76|    (NA)|         (NA)|\n      |         |      |   |      |        |             |\n      |e16sex   |     1| 50| 47.62|   47.62|        47.62|\n      |         |     2| 55| 52.38|   52.38|       100.00|\n      |         |  (NA)|  0|  0.00|    (NA)|         (NA)|\n      |         |      |   |      |        |             |\n\n---\n\n    Code\n      display(data_tabulate(efc, c(\"e42dep\", \"e16sex\"), weights = efc$weights))\n    Output\n      \n      \n      Table: Frequency Table (weighted)\n      \n      |Variable | Value|  N| Raw %| Valid %| Cumulative %|\n      |:--------|-----:|--:|-----:|-------:|------------:|\n      |e42dep   |     1|  3|  2.86|       3|            3|\n      |         |     2|  4|  3.81|       4|            7|\n      |         |     3| 26| 24.76|      26|           33|\n      |         |     4| 67| 63.81|      67|          100|\n      |         |  (NA)|  5|  4.76|    (NA)|         (NA)|\n      |         |      |   |      |        |             |\n      |e16sex   |     1| 50| 47.62|   47.62|        47.62|\n      |         |     2| 55| 52.38|   52.38|       100.00|\n      |         |  (NA)|  0|  0.00|    (NA)|         (NA)|\n      |         |      |   |      |        |             |\n\n# data_tabulate print\n\n    Code\n      data_tabulate(efc$e42dep)\n    Output\n      elder's dependency (efc$e42dep) <categorical>\n      # total N=100 valid N=97\n      \n      Value |  N | Raw % | Valid % | Cumulative %\n      ------+----+-------+---------+-------------\n      1     |  2 |     2 |    2.06 |         2.06\n      2     |  4 |     4 |    4.12 |         6.19\n      3     | 28 |    28 |   28.87 |        35.05\n      4     | 63 |    63 |   64.95 |       100.00\n      <NA>  |  3 |     3 |    <NA> |         <NA>\n\n# data_tabulate print multiple\n\n    Code\n      data_tabulate(efc, c(\"c172code\", \"e16sex\"))\n    Output\n      carer's level of education (c172code) <numeric>\n      # total N=100 valid N=90\n      \n      Value |  N | Raw % | Valid % | Cumulative %\n      ------+----+-------+---------+-------------\n      1     |  8 |     8 |    8.89 |         8.89\n      2     | 66 |    66 |   73.33 |        82.22\n      3     | 16 |    16 |   17.78 |       100.00\n      <NA>  | 10 |    10 |    <NA> |         <NA>\n      \n      elder's gender (e16sex) <numeric>\n      # total N=100 valid N=100\n      \n      Value |  N | Raw % | Valid % | Cumulative %\n      ------+----+-------+---------+-------------\n      1     | 46 |    46 |      46 |           46\n      2     | 54 |    54 |      54 |          100\n      <NA>  |  0 |     0 |    <NA> |         <NA>\n\n# data_tabulate big numbers\n\n    Code\n      data_tabulate(x)\n    Output\n      x <integer>\n      # total N=10,000,000 valid N=10,000,000\n      \n      Value |         N | Raw % | Valid % | Cumulative %\n      ------+-----------+-------+---------+-------------\n      1     | 1,998,318 | 19.98 |   19.98 |        19.98\n      2     | 1,998,338 | 19.98 |   19.98 |        39.97\n      3     | 2,001,814 | 20.02 |   20.02 |        59.98\n      4     | 1,999,423 | 19.99 |   19.99 |        79.98\n      5     | 2,002,107 | 20.02 |   20.02 |       100.00\n      <NA>  |         0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      print(data_tabulate(x), big_mark = \"-\")\n    Output\n      x <integer>\n      # total N=10-000-000 valid N=10-000-000\n      \n      Value |         N | Raw % | Valid % | Cumulative %\n      ------+-----------+-------+---------+-------------\n      1     | 1-998-318 | 19.98 |   19.98 |        19.98\n      2     | 1-998-338 | 19.98 |   19.98 |        39.97\n      3     | 2-001-814 | 20.02 |   20.02 |        59.98\n      4     | 1-999-423 | 19.99 |   19.99 |        79.98\n      5     | 2-002-107 | 20.02 |   20.02 |       100.00\n      <NA>  |         0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      print(data_tabulate(x), big_mark = \"\")\n    Output\n      x <integer>\n      # total N=10000000 valid N=10000000\n      \n      Value |       N | Raw % | Valid % | Cumulative %\n      ------+---------+-------+---------+-------------\n      1     | 1998318 | 19.98 |   19.98 |        19.98\n      2     | 1998338 | 19.98 |   19.98 |        39.97\n      3     | 2001814 | 20.02 |   20.02 |        59.98\n      4     | 1999423 | 19.99 |   19.99 |        79.98\n      5     | 2002107 | 20.02 |   20.02 |       100.00\n      <NA>  |       0 |  0.00 |    <NA> |         <NA>\n\n# data_tabulate print multiple, collapse\n\n    Code\n      data_tabulate(efc, c(\"c172code\", \"e16sex\"), collapse = TRUE)\n    Output\n      # Frequency Table\n      \n      Variable | Value |  N | Raw % | Valid % | Cumulative %\n      ---------+-------+----+-------+---------+-------------\n      c172code |     1 |  8 |     8 |    8.89 |         8.89\n               |     2 | 66 |    66 |   73.33 |        82.22\n               |     3 | 16 |    16 |   17.78 |       100.00\n               |  <NA> | 10 |    10 |    <NA> |         <NA>\n      ---------+-------+----+-------+---------+-------------\n      e16sex   |     1 | 46 |    46 |      46 |           46\n               |     2 | 54 |    54 |      54 |          100\n               |  <NA> |  0 |     0 |    <NA> |         <NA>\n      ------------------------------------------------------\n\n# data_tabulate print grouped data\n\n    Code\n      data_tabulate(poorman::group_by(efc, e16sex), \"c172code\")\n    Output\n      carer's level of education (c172code) <numeric>\n      Grouped by e16sex (1)\n      # total N=46 valid N=41\n      \n      Value |  N | Raw % | Valid % | Cumulative %\n      ------+----+-------+---------+-------------\n      1     |  5 | 10.87 |   12.20 |        12.20\n      2     | 32 | 69.57 |   78.05 |        90.24\n      3     |  4 |  8.70 |    9.76 |       100.00\n      <NA>  |  5 | 10.87 |    <NA> |         <NA>\n      \n      carer's level of education (c172code) <numeric>\n      Grouped by e16sex (2)\n      # total N=54 valid N=49\n      \n      Value |  N | Raw % | Valid % | Cumulative %\n      ------+----+-------+---------+-------------\n      1     |  3 |  5.56 |    6.12 |         6.12\n      2     | 34 | 62.96 |   69.39 |        75.51\n      3     | 12 | 22.22 |   24.49 |       100.00\n      <NA>  |  5 |  9.26 |    <NA> |         <NA>\n\n# data_tabulate print, collapse groups\n\n    Code\n      data_tabulate(poorman::group_by(efc, e16sex), \"c172code\", collapse = TRUE)\n    Output\n      # Frequency Table\n      \n      Variable |      Group | Value |  N | Raw % | Valid % | Cumulative %\n      ---------+------------+-------+----+-------+---------+-------------\n      c172code | e16sex (1) |     1 |  5 | 10.87 |   12.20 |        12.20\n               |            |     2 | 32 | 69.57 |   78.05 |        90.24\n               |            |     3 |  4 |  8.70 |    9.76 |       100.00\n               |            |  <NA> |  5 | 10.87 |    <NA> |         <NA>\n      ---------+------------+-------+----+-------+---------+-------------\n      c172code | e16sex (2) |     1 |  3 |  5.56 |    6.12 |         6.12\n               |            |     2 | 34 | 62.96 |   69.39 |        75.51\n               |            |     3 | 12 | 22.22 |   24.49 |       100.00\n               |            |  <NA> |  5 |  9.26 |    <NA> |         <NA>\n      -------------------------------------------------------------------\n\n# data_tabulate print, collapse groups, drop levels\n\n    Code\n      data_tabulate(poorman::group_by(efc, e16sex), \"e42dep\", collapse = TRUE,\n      drop_levels = TRUE)\n    Output\n      # Frequency Table\n      \n      Variable |      Group | Value |  N | Raw % | Valid % | Cumulative %\n      ---------+------------+-------+----+-------+---------+-------------\n      e42dep   | e16sex (1) |     1 |  2 |  4.35 |    4.44 |         4.44\n               |            |     2 |  2 |  4.35 |    4.44 |         8.89\n               |            |     3 |  8 | 17.39 |   17.78 |        26.67\n               |            |     4 | 33 | 71.74 |   73.33 |       100.00\n               |            |  <NA> |  1 |  2.17 |    <NA> |         <NA>\n      ---------+------------+-------+----+-------+---------+-------------\n      e42dep   | e16sex (2) |     2 |  2 |  3.70 |    3.85 |         3.85\n               |            |     3 | 20 | 37.04 |   38.46 |        42.31\n               |            |     4 | 30 | 55.56 |   57.69 |       100.00\n               |            |  <NA> |  2 |  3.70 |    <NA> |         <NA>\n      -------------------------------------------------------------------\n\n# data_tabulate, cross tables\n\n    Code\n      print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\"))\n    Output\n      efc$c172code |       male |     female |     <NA> | Total\n      -------------+------------+------------+----------+------\n      1            |  5  (5.0%) |  2  (2.0%) | 1 (1.0%) |     8\n      2            | 31 (31.0%) | 33 (33.0%) | 2 (2.0%) |    66\n      3            |  4  (4.0%) | 11 (11.0%) | 1 (1.0%) |    16\n      <NA>         |  5  (5.0%) |  4  (4.0%) | 1 (1.0%) |    10\n      -------------+------------+------------+----------+------\n      Total        |         45 |         50 |        5 |   100\n\n---\n\n    Code\n      print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      remove_na = TRUE))\n    Output\n      efc$c172code |       male |     female | Total\n      -------------+------------+------------+------\n      1            |  5  (5.8%) |  2  (2.3%) |     7\n      2            | 31 (36.0%) | 33 (38.4%) |    64\n      3            |  4  (4.7%) | 11 (12.8%) |    15\n      -------------+------------+------------+------\n      Total        |         40 |         46 |    86\n\n---\n\n    Code\n      print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      weights = efc$weights))\n    Output\n      efc$c172code |       male |     female |     <NA> | Total\n      -------------+------------+------------+----------+------\n      1            |  5  (4.8%) |  3  (2.9%) | 2 (1.9%) |    10\n      2            | 32 (30.5%) | 32 (30.5%) | 3 (2.9%) |    67\n      3            |  3  (2.9%) | 11 (10.5%) | 1 (1.0%) |    15\n      <NA>         |  8  (7.6%) |  5  (4.8%) | 1 (1.0%) |    14\n      -------------+------------+------------+----------+------\n      Total        |         48 |         51 |        7 |   105\n\n---\n\n    Code\n      print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      remove_na = TRUE, weights = efc$weights))\n    Output\n      efc$c172code |       male |     female | Total\n      -------------+------------+------------+------\n      1            |  5  (5.8%) |  3  (3.5%) |     8\n      2            | 32 (37.2%) | 32 (37.2%) |    64\n      3            |  3  (3.5%) | 11 (12.8%) |    14\n      -------------+------------+------------+------\n      Total        |         40 |         46 |    86\n\n---\n\n    Code\n      print(data_tabulate(efc, \"c172code\", by = efc$e16sex, proportions = \"row\"))\n    Output\n      c172code |       male |     female |      <NA> | Total\n      ---------+------------+------------+-----------+------\n      1        |  5 (62.5%) |  2 (25.0%) | 1 (12.5%) |     8\n      2        | 31 (47.0%) | 33 (50.0%) | 2  (3.0%) |    66\n      3        |  4 (25.0%) | 11 (68.8%) | 1  (6.2%) |    16\n      <NA>     |  5 (50.0%) |  4 (40.0%) | 1 (10.0%) |    10\n      ---------+------------+------------+-----------+------\n      Total    |         45 |         50 |         5 |   100\n\n---\n\n    Code\n      print(data_tabulate(efc, \"c172code\", by = efc$e16sex, proportions = \"row\",\n      remove_na = TRUE))\n    Output\n      c172code |       male |     female | Total\n      ---------+------------+------------+------\n      1        |  5 (71.4%) |  2 (28.6%) |     7\n      2        | 31 (48.4%) | 33 (51.6%) |    64\n      3        |  4 (26.7%) | 11 (73.3%) |    15\n      ---------+------------+------------+------\n      Total    |         40 |         46 |    86\n\n---\n\n    Code\n      print(data_tabulate(efc, \"c172code\", by = efc$e16sex, proportions = \"row\",\n      weights = efc$weights))\n    Output\n      c172code |       male |     female |      <NA> | Total\n      ---------+------------+------------+-----------+------\n      1        |  5 (50.0%) |  3 (30.0%) | 2 (20.0%) |    10\n      2        | 32 (47.8%) | 32 (47.8%) | 3  (4.5%) |    67\n      3        |  3 (20.0%) | 11 (73.3%) | 1  (6.7%) |    15\n      <NA>     |  8 (57.1%) |  5 (35.7%) | 1  (7.1%) |    14\n      ---------+------------+------------+-----------+------\n      Total    |         48 |         51 |         7 |   105\n\n---\n\n    Code\n      print(data_tabulate(efc, \"c172code\", by = efc$e16sex, proportions = \"row\",\n      remove_na = TRUE, weights = efc$weights))\n    Output\n      c172code |       male |     female | Total\n      ---------+------------+------------+------\n      1        |  5 (62.5%) |  3 (37.5%) |     8\n      2        | 32 (50.0%) | 32 (50.0%) |    64\n      3        |  3 (21.4%) | 11 (78.6%) |    14\n      ---------+------------+------------+------\n      Total    |         40 |         46 |    86\n\n---\n\n    Code\n      print(data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\"))\n    Output\n      c172code |       male |     female |      <NA> | Total\n      ---------+------------+------------+-----------+------\n      1        |  5 (11.1%) |  2  (4.0%) | 1 (20.0%) |     8\n      2        | 31 (68.9%) | 33 (66.0%) | 2 (40.0%) |    66\n      3        |  4  (8.9%) | 11 (22.0%) | 1 (20.0%) |    16\n      <NA>     |  5 (11.1%) |  4  (8.0%) | 1 (20.0%) |    10\n      ---------+------------+------------+-----------+------\n      Total    |         45 |         50 |         5 |   100\n\n---\n\n    Code\n      print(data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\",\n        remove_na = TRUE))\n    Output\n      c172code |       male |     female | Total\n      ---------+------------+------------+------\n      1        |  5 (12.5%) |  2  (4.3%) |     7\n      2        | 31 (77.5%) | 33 (71.7%) |    64\n      3        |  4 (10.0%) | 11 (23.9%) |    15\n      ---------+------------+------------+------\n      Total    |         40 |         46 |    86\n\n---\n\n    Code\n      print(data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\",\n        weights = \"weights\"))\n    Output\n      c172code |       male |     female |      <NA> | Total\n      ---------+------------+------------+-----------+------\n      1        |  5 (10.4%) |  3  (5.9%) | 2 (28.6%) |    10\n      2        | 32 (66.7%) | 32 (62.7%) | 3 (42.9%) |    67\n      3        |  3  (6.2%) | 11 (21.6%) | 1 (14.3%) |    15\n      <NA>     |  8 (16.7%) |  5  (9.8%) | 1 (14.3%) |    14\n      ---------+------------+------------+-----------+------\n      Total    |         48 |         51 |         7 |   105\n\n---\n\n    Code\n      print(data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\",\n        remove_na = TRUE, weights = \"weights\"))\n    Output\n      c172code |       male |     female | Total\n      ---------+------------+------------+------\n      1        |  5 (12.5%) |  3  (6.5%) |     8\n      2        | 32 (80.0%) | 32 (69.6%) |    64\n      3        |  3  (7.5%) | 11 (23.9%) |    14\n      ---------+------------+------------+------\n      Total    |         40 |         46 |    86\n\n---\n\n    Code\n      print(data_tabulate(efc, c(\"c172code\", \"e42dep\"), by = \"e16sex\", proportions = \"row\"))\n    Output\n      Variable | Value |        male |     female |      <NA> | Total\n      ---------+-------+-------------+------------+-----------+------\n      c172code |     1 |   5 (62.5%) |  2 (25.0%) | 1 (12.5%) |     8\n      c172code |     2 |  31 (47.0%) | 33 (50.0%) | 2  (3.0%) |    66\n      c172code |     3 |   4 (25.0%) | 11 (68.8%) | 1  (6.2%) |    16\n      c172code |  <NA> |   5 (50.0%) |  4 (40.0%) | 1 (10.0%) |    10\n      e42dep   |     1 |  2 (100.0%) |  0  (0.0%) |  0 (0.0%) |     2\n      e42dep   |     2 |  2  (50.0%) |  2 (50.0%) |  0 (0.0%) |     4\n      e42dep   |     3 |  8  (28.6%) | 18 (64.3%) |  2 (7.1%) |    28\n      e42dep   |     4 | 32  (50.8%) | 28 (44.4%) |  3 (4.8%) |    63\n      e42dep   |  <NA> |  1  (33.3%) |  2 (66.7%) |  0 (0.0%) |     3\n\n# data_tabulate, cross tables, tinytable\n\n    Code\n      display(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\"),\n      format = \"tt\")\n    Output\n      \n      +--------------+------------+------------+----------+-------+\n      | efc$c172code | male       | female     | (NA)     | Total |\n      +==============+============+============+==========+=======+\n      | 1            | 5  (5.0%)  | 2  (2.0%)  | 1 (1.0%) | 8     |\n      +--------------+------------+------------+----------+-------+\n      | 2            | 31 (31.0%) | 33 (33.0%) | 2 (2.0%) | 66    |\n      +--------------+------------+------------+----------+-------+\n      | 3            | 4  (4.0%)  | 11 (11.0%) | 1 (1.0%) | 16    |\n      +--------------+------------+------------+----------+-------+\n      | (NA)         | 5  (5.0%)  | 4  (4.0%)  | 1 (1.0%) | 10    |\n      +--------------+------------+------------+----------+-------+\n      | Total        | 45         | 50         | 5        | 100   |\n      +--------------+------------+------------+----------+-------+ \n\n---\n\n    Code\n      display(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      remove_na = TRUE), format = \"tt\")\n    Output\n      \n      +--------------+------------+------------+-------+\n      | efc$c172code | male       | female     | Total |\n      +==============+============+============+=======+\n      | 1            | 5  (5.8%)  | 2  (2.3%)  | 7     |\n      +--------------+------------+------------+-------+\n      | 2            | 31 (36.0%) | 33 (38.4%) | 64    |\n      +--------------+------------+------------+-------+\n      | 3            | 4  (4.7%)  | 11 (12.8%) | 15    |\n      +--------------+------------+------------+-------+\n      | Total        | 40         | 46         | 86    |\n      +--------------+------------+------------+-------+ \n\n---\n\n    Code\n      display(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      weights = efc$weights), format = \"tt\")\n    Output\n      \n      +--------------+------------+------------+----------+-------+\n      | efc$c172code | male       | female     | (NA)     | Total |\n      +==============+============+============+==========+=======+\n      | 1            | 5  (4.8%)  | 3  (2.9%)  | 2 (1.9%) | 10    |\n      +--------------+------------+------------+----------+-------+\n      | 2            | 32 (30.5%) | 32 (30.5%) | 3 (2.9%) | 67    |\n      +--------------+------------+------------+----------+-------+\n      | 3            | 3  (2.9%)  | 11 (10.5%) | 1 (1.0%) | 15    |\n      +--------------+------------+------------+----------+-------+\n      | (NA)         | 8  (7.6%)  | 5  (4.8%)  | 1 (1.0%) | 14    |\n      +--------------+------------+------------+----------+-------+\n      | Total        | 48         | 51         | 7        | 105   |\n      +--------------+------------+------------+----------+-------+ \n\n---\n\n    Code\n      display(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      remove_na = TRUE, weights = efc$weights), format = \"tt\")\n    Output\n      \n      +--------------+------------+------------+-------+\n      | efc$c172code | male       | female     | Total |\n      +==============+============+============+=======+\n      | 1            | 5  (5.8%)  | 3  (3.5%)  | 8     |\n      +--------------+------------+------------+-------+\n      | 2            | 32 (37.2%) | 32 (37.2%) | 64    |\n      +--------------+------------+------------+-------+\n      | 3            | 3  (3.5%)  | 11 (12.8%) | 14    |\n      +--------------+------------+------------+-------+\n      | Total        | 40         | 46         | 86    |\n      +--------------+------------+------------+-------+ \n\n---\n\n    Code\n      display(data_tabulate(efc, \"c172code\", by = efc$e16sex, proportions = \"row\"),\n      format = \"tt\")\n    Output\n      \n      +----------+------------+------------+-----------+-------+\n      | c172code | male       | female     | (NA)      | Total |\n      +==========+============+============+===========+=======+\n      | 1        | 5 (62.5%)  | 2 (25.0%)  | 1 (12.5%) | 8     |\n      +----------+------------+------------+-----------+-------+\n      | 2        | 31 (47.0%) | 33 (50.0%) | 2  (3.0%) | 66    |\n      +----------+------------+------------+-----------+-------+\n      | 3        | 4 (25.0%)  | 11 (68.8%) | 1  (6.2%) | 16    |\n      +----------+------------+------------+-----------+-------+\n      | (NA)     | 5 (50.0%)  | 4 (40.0%)  | 1 (10.0%) | 10    |\n      +----------+------------+------------+-----------+-------+\n      | Total    | 45         | 50         | 5         | 100   |\n      +----------+------------+------------+-----------+-------+ \n\n---\n\n    Code\n      display(data_tabulate(efc, \"c172code\", by = efc$e16sex, proportions = \"row\",\n      remove_na = TRUE, weights = efc$weights), format = \"tt\")\n    Output\n      \n      +----------+------------+------------+-------+\n      | c172code | male       | female     | Total |\n      +==========+============+============+=======+\n      | 1        | 5 (62.5%)  | 3 (37.5%)  | 8     |\n      +----------+------------+------------+-------+\n      | 2        | 32 (50.0%) | 32 (50.0%) | 64    |\n      +----------+------------+------------+-------+\n      | 3        | 3 (21.4%)  | 11 (78.6%) | 14    |\n      +----------+------------+------------+-------+\n      | Total    | 40         | 46         | 86    |\n      +----------+------------+------------+-------+ \n\n# data_tabulate, cross tables, grouped df\n\n    Code\n      print(data_tabulate(grp, \"c172code\", by = \"e16sex\", proportions = \"row\"))\n    Output\n      Grouped by e42dep (1)\n      \n      Variable | Value |       male | female |     <NA> | Total\n      ---------+-------+------------+--------+----------+------\n      c172code |     2 | 2 (100.0%) |   <NA> | 0 (0.0%) |     2\n               |  <NA> | 0   (0.0%) |   <NA> | 0 (0.0%) |     0\n      \n      Grouped by e42dep (2)\n      \n      Variable | Value |      male |    female |     <NA> | Total\n      ---------+-------+-----------+-----------+----------+------\n      c172code |     2 | 2 (50.0%) | 2 (50.0%) | 0 (0.0%) |     4\n               |  <NA> | 0  (0.0%) | 0  (0.0%) | 0 (0.0%) |     0\n      \n      Grouped by e42dep (3)\n      \n      Variable | Value |      male |     female |      <NA> | Total\n      ---------+-------+-----------+------------+-----------+------\n      c172code |     1 | 2 (50.0%) |  2 (50.0%) | 0  (0.0%) |     4\n               |     2 | 4 (25.0%) | 11 (68.8%) | 1  (6.2%) |    16\n               |     3 | 1 (16.7%) |  5 (83.3%) | 0  (0.0%) |     6\n               |  <NA> | 1 (50.0%) |  0  (0.0%) | 1 (50.0%) |     2\n      \n      Grouped by e42dep (4)\n      \n      Variable | Value |       male |     female |      <NA> | Total\n      ---------+-------+------------+------------+-----------+------\n      c172code |     1 |  3 (75.0%) |  0  (0.0%) | 1 (25.0%) |     4\n               |     2 | 23 (54.8%) | 18 (42.9%) | 1  (2.4%) |    42\n               |     3 |  3 (30.0%) |  6 (60.0%) | 1 (10.0%) |    10\n               |  <NA> |  3 (42.9%) |  4 (57.1%) | 0  (0.0%) |     7\n      \n      Grouped by e42dep (NA)\n      \n      Variable | Value |       male |     female |     <NA> | Total\n      ---------+-------+------------+------------+----------+------\n      c172code |     2 | 0   (0.0%) | 2 (100.0%) | 0 (0.0%) |     2\n               |  <NA> | 1 (100.0%) | 0   (0.0%) | 0 (0.0%) |     1\n\n# data_tabulate, cross tables, print/format works\n\n    Code\n      print(x)\n    Output\n      Variable | Value |  3 | 4 | 5 | <NA> | Total\n      ---------+-------+----+---+---+------+------\n      cyl      |     4 |  1 | 8 | 2 |    0 |    11\n      cyl      |     6 |  2 | 4 | 1 |    0 |     7\n      cyl      |     8 | 12 | 0 | 2 |    0 |    14\n      cyl      |  <NA> |  0 | 0 | 0 |    0 |     0\n      am       |     0 | 15 | 4 | 0 |    0 |    19\n      am       |     1 |  0 | 8 | 5 |    0 |    13\n      am       |  <NA> |  0 | 0 | 0 |    0 |     0\n\n# data_tabulate, cross tables, markdown\n\n    Code\n      print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\"))\n    Output\n      \n      \n      |efc$c172code |       male|     female|    (NA) | Total|\n      |:------------|----------:|----------:|:--------|-----:|\n      |1            |  5  (5.0%)|  2  (2.0%)|1 (1.0%) |     8|\n      |2            | 31 (31.0%)| 33 (33.0%)|2 (2.0%) |    66|\n      |3            |  4  (4.0%)| 11 (11.0%)|1 (1.0%) |    16|\n      |(NA)         |  5  (5.0%)|  4  (4.0%)|1 (1.0%) |    10|\n      |             |           |           |         |      |\n      |Total        |         45|         50|       5 |   100|\n\n---\n\n    Code\n      print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      remove_na = TRUE))\n    Output\n      \n      \n      |efc$c172code |       male|     female| Total|\n      |:------------|----------:|----------:|-----:|\n      |1            |  5  (5.8%)|  2  (2.3%)|     7|\n      |2            | 31 (36.0%)| 33 (38.4%)|    64|\n      |3            |  4  (4.7%)| 11 (12.8%)|    15|\n      |             |           |           |      |\n      |Total        |         40|         46|    86|\n\n---\n\n    Code\n      print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      weights = efc$weights))\n    Output\n      \n      \n      |efc$c172code |       male|     female|    (NA) | Total|\n      |:------------|----------:|----------:|:--------|-----:|\n      |1            |  5  (4.8%)|  3  (2.9%)|2 (1.9%) |    10|\n      |2            | 32 (30.5%)| 32 (30.5%)|3 (2.9%) |    67|\n      |3            |  3  (2.9%)| 11 (10.5%)|1 (1.0%) |    15|\n      |(NA)         |  8  (7.6%)|  5  (4.8%)|1 (1.0%) |    14|\n      |             |           |           |         |      |\n      |Total        |         48|         51|       7 |   105|\n\n---\n\n    Code\n      print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\",\n      remove_na = TRUE, weights = efc$weights))\n    Output\n      \n      \n      |efc$c172code |       male|     female| Total|\n      |:------------|----------:|----------:|-----:|\n      |1            |  5  (5.8%)|  3  (3.5%)|     8|\n      |2            | 32 (37.2%)| 32 (37.2%)|    64|\n      |3            |  3  (3.5%)| 11 (12.8%)|    14|\n      |             |           |           |      |\n      |Total        |         40|         46|    86|\n\n---\n\n    Code\n      print_md(data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\",\n        remove_na = TRUE, weights = \"weights\"))\n    Output\n      \n      \n      |c172code |       male|     female| Total|\n      |:--------|----------:|----------:|-----:|\n      |1        |  5 (12.5%)|  3  (6.5%)|     8|\n      |2        | 32 (80.0%)| 32 (69.6%)|    64|\n      |3        |  3  (7.5%)| 11 (23.9%)|    14|\n      |         |           |           |      |\n      |Total    |         40|         46|    86|\n\n---\n\n    Code\n      print_md(data_tabulate(efc, c(\"c172code\", \"e42dep\"), by = \"e16sex\",\n      proportions = \"row\"))\n    Output\n      \n      \n      |Variable | Value|        male|     female|     (NA) | Total|\n      |:--------|-----:|-----------:|----------:|:---------|-----:|\n      |c172code |     1|   5 (62.5%)|  2 (25.0%)|1 (12.5%) |     8|\n      |c172code |     2|  31 (47.0%)| 33 (50.0%)|2  (3.0%) |    66|\n      |c172code |     3|   4 (25.0%)| 11 (68.8%)|1  (6.2%) |    16|\n      |c172code |  (NA)|   5 (50.0%)|  4 (40.0%)|1 (10.0%) |    10|\n      |e42dep   |     1|  2 (100.0%)|  0  (0.0%)| 0 (0.0%) |     2|\n      |e42dep   |     2|  2  (50.0%)|  2 (50.0%)| 0 (0.0%) |     4|\n      |e42dep   |     3|  8  (28.6%)| 18 (64.3%)| 2 (7.1%) |    28|\n      |e42dep   |     4| 32  (50.8%)| 28 (44.4%)| 3 (4.8%) |    63|\n      |e42dep   |  (NA)|  1  (33.3%)|  2 (66.7%)| 0 (0.0%) |     3|\n\n---\n\n    Code\n      display(data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\",\n        remove_na = TRUE, weights = \"weights\"))\n    Output\n      \n      \n      |c172code |       male|     female| Total|\n      |:--------|----------:|----------:|-----:|\n      |1        |  5 (12.5%)|  3  (6.5%)|     8|\n      |2        | 32 (80.0%)| 32 (69.6%)|    64|\n      |3        |  3  (7.5%)| 11 (23.9%)|    14|\n      |         |           |           |      |\n      |Total    |         40|         46|    86|\n\n---\n\n    Code\n      display(data_tabulate(efc, c(\"c172code\", \"e42dep\"), by = \"e16sex\", proportions = \"row\"))\n    Output\n      \n      \n      |Variable | Value|        male|     female|     (NA) | Total|\n      |:--------|-----:|-----------:|----------:|:---------|-----:|\n      |c172code |     1|   5 (62.5%)|  2 (25.0%)|1 (12.5%) |     8|\n      |c172code |     2|  31 (47.0%)| 33 (50.0%)|2  (3.0%) |    66|\n      |c172code |     3|   4 (25.0%)| 11 (68.8%)|1  (6.2%) |    16|\n      |c172code |  (NA)|   5 (50.0%)|  4 (40.0%)|1 (10.0%) |    10|\n      |e42dep   |     1|  2 (100.0%)|  0  (0.0%)| 0 (0.0%) |     2|\n      |e42dep   |     2|  2  (50.0%)|  2 (50.0%)| 0 (0.0%) |     4|\n      |e42dep   |     3|  8  (28.6%)| 18 (64.3%)| 2 (7.1%) |    28|\n      |e42dep   |     4| 32  (50.8%)| 28 (44.4%)| 3 (4.8%) |    63|\n      |e42dep   |  (NA)|  1  (33.3%)|  2 (66.7%)| 0 (0.0%) |     3|\n\n# data_tabulate, correct 0% for proportions\n\n    Code\n      print(out[[1]])\n    Output\n      c172code |       male |     female |     <NA> | Total\n      ---------+------------+------------+----------+------\n      1        |  5 (10.9%) |  3  (5.6%) | 0 (0.0%) |     8\n      2        | 32 (69.6%) | 34 (63.0%) | 0 (0.0%) |    66\n      3        |  4  (8.7%) | 12 (22.2%) | 0 (0.0%) |    16\n      <NA>     |  5 (10.9%) |  5  (9.3%) | 0 (0.0%) |    10\n      ---------+------------+------------+----------+------\n      Total    |         46 |         54 |        0 |   100\n\n# data_tabulate, table methods\n\n    Code\n      as.table(x)\n    Output\n      [[1]]\n       4  6  8 \n      11  7 14 \n      \n\n---\n\n    Code\n      as.table(x)\n    Output\n      [[1]]\n       4  6  8 \n      11  7 14 \n      \n\n---\n\n    Code\n      as.table(x, remove_na = FALSE)\n    Output\n      [[1]]\n         4    6    8 <NA> \n        11    7   14    0 \n      \n\n---\n\n    Code\n      as.table(x)\n    Output\n      [[1]]\n       4  6  8 \n      11  7 14 \n      \n      [[2]]\n       3  4  5 \n      15 12  5 \n      \n\n---\n\n    Code\n      as.table(x)\n    Output\n      [[1]]\n         3  4  5\n      4  1  8  2\n      6  2  4  1\n      8 12  0  2\n      \n\n---\n\n    Code\n      as.table(x, simplify = TRUE)\n    Output\n         3  4  5\n      4  1  8  2\n      6  2  4  1\n      8 12  0  2\n\n---\n\n    Code\n      as.table(x)\n    Output\n      [[1]]\n         3  4  5\n      4  1  8  2\n      6  2  4  1\n      8 12  0  2\n      \n\n---\n\n    Code\n      as.table(x, simplify = TRUE)\n    Output\n         3  4  5\n      4  1  8  2\n      6  2  4  1\n      8 12  0  2\n\n---\n\n    Code\n      as.table(x)\n    Output\n      [[1]]\n         3  4  5\n      0 15  4  0\n      1  0  8  5\n      \n      [[2]]\n         3  4  5\n      4  1  8  2\n      6  2  4  1\n      8 12  0  2\n      \n\n---\n\n    Code\n      as.table(x)\n    Output\n      $`am (0)`\n         3  4\n      4  1  2\n      6  2  2\n      8 12  0\n      \n      $`am (1)`\n        4 5\n      4 6 2\n      6 2 1\n      8 0 2\n      \n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_to_factor.md",
    "content": "# data_read, convert many labels correctly\n\n    Code\n      data_tabulate(to_factor(d$selv1))\n    Output\n      to_factor(d$selv1) <categorical>\n      # total N=2413 valid N=2413\n      \n      Value                                              |   N | Raw % | Valid % | Cumulative %\n      ---------------------------------------------------+-----+-------+---------+-------------\n      Vignette 1 weiblich (Gülsen E. Reinigungskraft B)  | 150 |  6.22 |    6.22 |         6.22\n      Vignette 2 weiblich (Gülsen E. Anwältin B)         | 150 |  6.22 |    6.22 |        12.43\n      Vignette 3 weiblich (Monika E. Reinigungskraft B)  | 150 |  6.22 |    6.22 |        18.65\n      Vignette 4 weiblich (Monika E. Anwältin B)         | 151 |  6.26 |    6.26 |        24.91\n      Vignette 5 männlich (Hasan E. Reinigungskraft B)   | 151 |  6.26 |    6.26 |        31.16\n      Vignette 6 männlich (Hasan E. Anwalt B)            | 153 |  6.34 |    6.34 |        37.51\n      Vignette 7 männlich (Martin E. Reinigungskraft B)  | 150 |  6.22 |    6.22 |        43.72\n      Vignette 8 männlich (Martin E. Anwalt B)           | 150 |  6.22 |    6.22 |        49.94\n      Vignette 9 weiblich (Gülsen E. Reinigungskraft E)  | 151 |  6.26 |    6.26 |        56.20\n      Vignette 10 weiblich (Gülsen E. Anwältin E)        | 150 |  6.22 |    6.22 |        62.41\n      Vignette 11 weiblich (Monika E. Reinigungskraft E) | 150 |  6.22 |    6.22 |        68.63\n      Vignette 12 weiblich (Monika E. Anwältin E)        | 151 |  6.26 |    6.26 |        74.89\n      Vignette 13 männlich (Hasan E. Reinigungskraft E)  | 155 |  6.42 |    6.42 |        81.31\n      Vignette 14 männlich (Hasan E. Anwalt E)           | 150 |  6.22 |    6.22 |        87.53\n      Vignette 15 männlich (Martin E. Reinigungskraft E) | 150 |  6.22 |    6.22 |        93.74\n      Vignette 16 männlich (Martin E. Anwalt E)          | 151 |  6.26 |    6.26 |       100.00\n      <NA>                                               |   0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      data_tabulate(to_factor(d$c12))\n    Output\n      Sind oder waren Sie schon einmal selbst von solchen Beschwerden betroffen? (to_factor(d$c12)) <categorical>\n      # total N=2413 valid N=2413\n      \n      Value        |    N | Raw % | Valid % | Cumulative %\n      -------------+------+-------+---------+-------------\n      ja           |  786 | 32.57 |   32.57 |        32.57\n      nein         | 1616 | 66.97 |   66.97 |        99.54\n      keine Angabe |   11 |  0.46 |    0.46 |       100.00\n      <NA>         |    0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      data_tabulate(to_factor(d$c12a))\n    Output\n      Haben Sie deswegen Behandlung(en) in Anspruch genommen? (to_factor(d$c12a)) <categorical>\n      # total N=2413 valid N=2413\n      \n      Value        |    N | Raw % | Valid % | Cumulative %\n      -------------+------+-------+---------+-------------\n      Filter       | 1627 | 67.43 |   67.43 |        67.43\n      ja           |  500 | 20.72 |   20.72 |        88.15\n      nein         |  285 | 11.81 |   11.81 |        99.96\n      keine Angabe |    1 |  0.04 |    0.04 |       100.00\n      <NA>         |    0 |  0.00 |    <NA> |         <NA>\n\n---\n\n    Code\n      data_tabulate(to_factor(d$c12c))\n    Output\n      Wie sehr haben diese Behandlung(en) Ihre Beeinträchtigung durch die Beschwerden verbessert? (to_factor(d$c12c)) <categorical>\n      # total N=2413 valid N=2413\n      \n      Value                     |    N | Raw % | Valid % | Cumulative %\n      --------------------------+------+-------+---------+-------------\n      Filter                    | 1913 | 79.28 |   79.28 |        79.28\n      0 = keine                 |   34 |  1.41 |    1.41 |        80.69\n      1                         |    2 |  0.08 |    0.08 |        80.77\n      2                         |   11 |  0.46 |    0.46 |        81.23\n      3                         |   14 |  0.58 |    0.58 |        81.81\n      4                         |   19 |  0.79 |    0.79 |        82.59\n      5                         |   61 |  2.53 |    2.53 |        85.12\n      6                         |   42 |  1.74 |    1.74 |        86.86\n      7                         |   63 |  2.61 |    2.61 |        89.47\n      8                         |   97 |  4.02 |    4.02 |        93.49\n      9                         |   53 |  2.20 |    2.20 |        95.69\n      10 = sehr starke          |   99 |  4.10 |    4.10 |        99.79\n      weiß nicht / keine Angabe |    5 |  0.21 |    0.21 |       100.00\n      <NA>                      |    0 |  0.00 |    <NA> |         <NA>\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_to_long.md",
    "content": "# data_to_long works - complex dataset\n\n    Code\n      str(long)\n    Output\n      'data.frame':\t70000 obs. of  6 variables:\n       $ gender     : int  1 1 1 1 1 1 1 1 1 1 ...\n       $ education  : int  NA NA NA NA NA NA NA NA NA NA ...\n       $ age        : int  16 16 16 16 16 16 16 16 16 16 ...\n       $ Participant: num  61617 61617 61617 61617 61617 ...\n       $ Item       : chr  \"A1\" \"A2\" \"A3\" \"A4\" ...\n       $ Score      : int  2 4 3 4 4 2 3 3 4 4 ...\n\n# don't convert factors to integer\n\n    Code\n      print(mtcars_long)\n    Output\n         cyl  hp drat    wt vs am gear carb am_f cyl_f id    g  value\n      1    4  93 3.85 2.320  1  1    4    1    1     4  3  mpg  22.80\n      2    4  93 3.85 2.320  1  1    4    1    1     4  3 qsec  18.61\n      3    4  93 3.85 2.320  1  1    4    1    1     4  3 disp 108.00\n      4    8 245 3.21 3.570  0  0    3    4    0     8  7  mpg  14.30\n      5    8 245 3.21 3.570  0  0    3    4    0     8  7 qsec  15.84\n      6    8 245 3.21 3.570  0  0    3    4    0     8  7 disp 360.00\n      7    4  66 4.08 2.200  1  1    4    1    1     4 10  mpg  32.40\n      8    4  66 4.08 2.200  1  1    4    1    1     4 10 qsec  19.47\n      9    4  66 4.08 2.200  1  1    4    1    1     4 10 disp  78.70\n      10   8 264 4.22 3.170  0  1    5    4    1     8 11  mpg  15.80\n      11   8 264 4.22 3.170  0  1    5    4    1     8 11 qsec  14.50\n      12   8 264 4.22 3.170  0  1    5    4    1     8 11 disp 351.00\n      13   6 110 3.08 3.215  1  0    3    1    0     6  4  mpg  21.40\n      14   6 110 3.08 3.215  1  0    3    1    0     6  4 qsec  19.44\n      15   6 110 3.08 3.215  1  0    3    1    0     6  4 disp 258.00\n      16   8 175 3.15 3.440  0  0    3    2    0     8  5  mpg  18.70\n      17   8 175 3.15 3.440  0  0    3    2    0     8  5 qsec  17.02\n      18   8 175 3.15 3.440  0  0    3    2    0     8  5 disp 360.00\n      19   8 335 3.54 3.570  0  1    5    8    1     8 12  mpg  15.00\n      20   8 335 3.54 3.570  0  1    5    8    1     8 12 qsec  14.60\n      21   8 335 3.54 3.570  0  1    5    8    1     8 12 disp 301.00\n      22   6 110 3.90 2.620  0  1    4    4    1     6  1  mpg  21.00\n      23   6 110 3.90 2.620  0  1    4    4    1     6  1 qsec  16.46\n      24   6 110 3.90 2.620  0  1    4    4    1     6  1 disp 160.00\n      25   6 110 3.90 2.875  0  1    4    4    1     6  2  mpg  21.00\n      26   6 110 3.90 2.875  0  1    4    4    1     6  2 qsec  17.02\n      27   6 110 3.90 2.875  0  1    4    4    1     6  2 disp 160.00\n      28   4  95 3.92 3.150  1  0    4    2    0     4  9  mpg  22.80\n      29   4  95 3.92 3.150  1  0    4    2    0     4  9 qsec  22.90\n      30   4  95 3.92 3.150  1  0    4    2    0     4  9 disp 140.80\n      31   4  62 3.69 3.190  1  0    4    2    0     4  8  mpg  24.40\n      32   4  62 3.69 3.190  1  0    4    2    0     4  8 qsec  20.00\n      33   4  62 3.69 3.190  1  0    4    2    0     4  8 disp 146.70\n      34   6 105 2.76 3.460  1  0    3    1    0     6  6  mpg  18.10\n      35   6 105 2.76 3.460  1  0    3    1    0     6  6 qsec  20.22\n      36   6 105 2.76 3.460  1  0    3    1    0     6  6 disp 225.00\n\n"
  },
  {
    "path": "tests/testthat/_snaps/data_to_numeric.md",
    "content": "# convert data frame to numeric\n\n    Code\n      to_numeric(head(ToothGrowth), dummy_factors = TRUE)\n    Output\n         len supp.OJ supp.VC dose\n      1  4.2       0       1  0.5\n      2 11.5       0       1  0.5\n      3  7.3       0       1  0.5\n      4  5.8       0       1  0.5\n      5  6.4       0       1  0.5\n      6 10.0       0       1  0.5\n\n---\n\n    Code\n      to_numeric(head(ToothGrowth), dummy_factors = FALSE)\n    Output\n         len supp dose\n      1  4.2    2  0.5\n      2 11.5    2  0.5\n      3  7.3    2  0.5\n      4  5.8    2  0.5\n      5  6.4    2  0.5\n      6 10.0    2  0.5\n\n# convert factor to numeric\n\n    Code\n      to_numeric(f, dummy_factors = TRUE)\n    Output\n         a c i s t\n      1  0 0 0 1 0\n      2  0 0 0 0 1\n      3  1 0 0 0 0\n      4  0 0 0 0 1\n      5  0 0 1 0 0\n      6  0 0 0 1 0\n      7  0 0 0 0 1\n      8  0 0 1 0 0\n      9  0 1 0 0 0\n      10 0 0 0 1 0\n\n"
  },
  {
    "path": "tests/testthat/_snaps/demean.md",
    "content": "# demean works\n\n    Code\n      head(x)\n    Output\n        Sepal.Length_between Petal.Length_between Sepal.Length_within\n      1             5.925000             3.527500          -0.8250000\n      2             5.925000             3.527500          -1.0250000\n      3             5.925000             3.527500          -1.2250000\n      4             5.862222             3.951111          -1.2622222\n      5             5.925000             3.527500          -0.9250000\n      6             5.862222             3.951111          -0.4622222\n        Petal.Length_within\n      1           -2.127500\n      2           -2.127500\n      3           -2.227500\n      4           -2.451111\n      5           -2.127500\n      6           -2.251111\n\n---\n\n    Code\n      head(x)\n    Output\n        Sepal.Length_between binary_between Species_between Species_setosa_between\n      1             5.925000          0.375        0.850000              0.4250000\n      2             5.925000          0.375        0.850000              0.4250000\n      3             5.925000          0.375        0.850000              0.4250000\n      4             5.862222          0.400        1.133333              0.2888889\n      5             5.925000          0.375        0.850000              0.4250000\n      6             5.862222          0.400        1.133333              0.2888889\n        Species_versicolor_between Species_virginica_between Sepal.Length_within\n      1                  0.3000000                 0.2750000          -0.8250000\n      2                  0.3000000                 0.2750000          -1.0250000\n      3                  0.3000000                 0.2750000          -1.2250000\n      4                  0.2888889                 0.4222222          -1.2622222\n      5                  0.3000000                 0.2750000          -0.9250000\n      6                  0.2888889                 0.4222222          -0.4622222\n        binary_within Species_within Species_setosa_within Species_versicolor_within\n      1        -0.375      -0.850000             0.5750000                -0.3000000\n      2         0.625      -0.850000             0.5750000                -0.3000000\n      3        -0.375      -0.850000             0.5750000                -0.3000000\n      4         0.600      -1.133333             0.7111111                -0.2888889\n      5         0.625      -0.850000             0.5750000                -0.3000000\n      6        -0.400      -1.133333             0.7111111                -0.2888889\n        Species_virginica_within\n      1               -0.2750000\n      2               -0.2750000\n      3               -0.2750000\n      4               -0.4222222\n      5               -0.2750000\n      6               -0.4222222\n\n---\n\n    Code\n      head(x)\n    Output\n        Sepal.Length Sepal.Width Petal.Length Petal.Width Species ID binary\n      1          5.1         3.5          1.4         0.2  setosa  3      0\n      2          4.9         3.0          1.4         0.2  setosa  3      1\n      3          4.7         3.2          1.3         0.2  setosa  3      0\n      4          4.6         3.1          1.5         0.2  setosa  2      1\n      5          5.0         3.6          1.4         0.2  setosa  3      1\n      6          5.4         3.9          1.7         0.4  setosa  2      0\n        Sepal.Length_between Petal.Length_between Sepal.Length_within\n      1             5.925000             3.527500          -0.8250000\n      2             5.925000             3.527500          -1.0250000\n      3             5.925000             3.527500          -1.2250000\n      4             5.862222             3.951111          -1.2622222\n      5             5.925000             3.527500          -0.9250000\n      6             5.862222             3.951111          -0.4622222\n        Petal.Length_within\n      1           -2.127500\n      2           -2.127500\n      3           -2.227500\n      4           -2.451111\n      5           -2.127500\n      6           -2.251111\n\n# demean interaction term\n\n    Code\n      demean(dat, select = c(\"a\", \"x*y\"), by = \"ID\", append = FALSE)\n    Output\n        a_between x_y_between   a_within x_y_within\n      1  2.666667    4.666667 -1.6666667 -0.6666667\n      2  2.333333    4.000000 -0.3333333  2.0000000\n      3  2.500000    4.500000  0.5000000 -1.5000000\n      4  2.666667    4.666667  1.3333333  3.3333333\n      5  2.333333    4.000000 -1.3333333  0.0000000\n      6  2.500000    4.500000 -0.5000000  1.5000000\n      7  2.666667    4.666667  0.3333333 -2.6666667\n      8  2.333333    4.000000  1.6666667 -2.0000000\n\n"
  },
  {
    "path": "tests/testthat/_snaps/describe_distribution.md",
    "content": "# describe_distribution - factor\n\n    Code\n      describe_distribution(factor(substring(\"statistics\", 1:10, 1:10)))\n    Output\n      Mean | SD |  Range | Skewness | Kurtosis |  n | n_Missing\n      ---------------------------------------------------------\n           |    | [a, t] |    -0.77 |    -0.13 | 10 |         0\n\n# describe_distribution - character\n\n    Code\n      describe_distribution(as.character(ToothGrowth$supp))\n    Output\n      Mean | SD |    Range | Skewness | Kurtosis |  n | n_Missing\n      -----------------------------------------------------------\n           |    | [VC, OJ] |        0 |    -2.07 | 60 |         0\n\n# describe_distribution - grouped df\n\n    Code\n      out\n    Output\n      Species    |     Variable | Mean |   SD |  IQR |        Range | Skewness\n      ------------------------------------------------------------------------\n      setosa     | Petal.Length | 1.46 | 0.17 | 0.20 | [1.00, 1.90] |     0.11\n      setosa     |  Petal.Width | 0.25 | 0.11 | 0.10 | [0.10, 0.60] |     1.25\n      versicolor | Petal.Length | 4.26 | 0.47 | 0.60 | [3.00, 5.10] |    -0.61\n      versicolor |  Petal.Width | 1.33 | 0.20 | 0.30 | [1.00, 1.80] |    -0.03\n      virginica  | Petal.Length | 5.55 | 0.55 | 0.80 | [4.50, 6.90] |     0.55\n      virginica  |  Petal.Width | 2.03 | 0.27 | 0.50 | [1.40, 2.50] |    -0.13\n      \n      Species    | Kurtosis |  n | n_Missing\n      --------------------------------------\n      setosa     |     1.02 | 50 |         0\n      setosa     |     1.72 | 50 |         0\n      versicolor |     0.05 | 50 |         0\n      versicolor |    -0.41 | 50 |         0\n      virginica  |    -0.15 | 50 |         0\n      virginica  |    -0.60 | 50 |         0\n\n# describe_distribution - grouped df and multiple groups\n\n    Code\n      describe_distribution(x)\n    Output\n      grp1 | grp2 | Variable |  Mean |    SD |   IQR |          Range | Skewness\n      --------------------------------------------------------------------------\n      a    |    a |   values | 10.00 |  6.48 | 12.00 |  [1.00, 19.00] |     0.00\n      b    |    a |   values | 13.86 | 10.92 | 21.00 |  [1.00, 28.00] |     0.23\n      c    |    a |   values | 20.50 |  5.61 | 10.50 | [13.00, 28.00] |     0.00\n      a    |    b |   values | 11.00 |  6.48 | 12.00 |  [2.00, 20.00] |     0.00\n      b    |    b |   values | 15.50 | 11.81 | 22.50 |  [2.00, 29.00] |     0.00\n      c    |    b |   values | 20.00 |  6.48 | 12.00 | [11.00, 29.00] |     0.00\n      a    |    c |   values | 10.50 |  5.61 | 10.50 |  [3.00, 18.00] |     0.00\n      b    |    c |   values | 17.14 | 10.92 | 21.00 |  [3.00, 30.00] |    -0.23\n      c    |    c |   values | 21.00 |  6.48 | 12.00 | [12.00, 30.00] |     0.00\n      \n      grp1 | Kurtosis | n | n_Missing\n      -------------------------------\n      a    |    -1.20 | 7 |         0\n      b    |    -2.14 | 7 |         0\n      c    |    -1.20 | 6 |         0\n      a    |    -1.20 | 7 |         0\n      b    |    -2.76 | 6 |         0\n      c    |    -1.20 | 7 |         0\n      a    |    -1.20 | 6 |         0\n      b    |    -2.14 | 7 |         0\n      c    |    -1.20 | 7 |         0\n\n# describe_distribution formatting\n\n    Code\n      format(x)\n    Output\n      Mean |   SD |  IQR |        Range |  Quartiles | Skewness | Kurtosis |   n | n_Missing\n      --------------------------------------------------------------------------------------\n      3.06 | 0.44 | 0.52 | [2.00, 4.40] | 2.80, 3.30 |     0.32 |     0.23 | 150 |         0\n\n# (multiple) centralities with CIs\n\n    Code\n      print(out, table_width = Inf)\n    Output\n      Median | 95% CI (Median) |  MAD | Mean | 95% CI (Mean) |   SD |  MAP | 95% CI (MAP) |  IQR |        Range | Skewness | Kurtosis |   n | n_Missing\n      -------------------------------------------------------------------------------------------------------------------------------------------------\n           3 |    [3.00, 3.10] | 0.44 | 3.06 |  [3.00, 3.13] | 0.44 | 3.00 | [2.94, 3.07] | 0.52 | [2.00, 4.40] |     0.32 |     0.23 | 150 |         0\n\n---\n\n    Code\n      print(out, table_width = Inf)\n    Output\n      Mean | 95% CI (Mean) |   SD |  IQR |        Range | Skewness | Kurtosis |   n | n_Missing\n      -----------------------------------------------------------------------------------------\n      3.06 |  [3.00, 3.13] | 0.44 | 0.52 | [2.00, 4.40] |     0.32 |     0.23 | 150 |         0\n\n---\n\n    Code\n      print(out, table_width = Inf)\n    Output\n      Median | 95% CI (Median) |  MAD |  MAP | 95% CI (MAP) |  IQR |        Range | Skewness | Kurtosis |   n | n_Missing\n      -------------------------------------------------------------------------------------------------------------------\n           3 |    [3.00, 3.10] | 0.44 | 3.00 | [2.91, 3.09] | 0.52 | [2.00, 4.40] |     0.32 |     0.23 | 150 |         0\n\n# display() method exports to markdown\n\n    Code\n      display(out)\n    Output\n      \n      \n      |Variable     | Mean |   SD |  IQR |        Range | Skewness | Kurtosis |   n | n_Missing |\n      |:------------|:----:|:----:|:----:|:------------:|:--------:|:--------:|:---:|:---------:|\n      |Sepal.Length | 5.84 | 0.83 | 1.30 | (4.30, 7.90) |     0.31 |    -0.55 | 150 |         0 |\n      |Sepal.Width  | 3.06 | 0.44 | 0.52 | (2.00, 4.40) |     0.32 |     0.23 | 150 |         0 |\n      |Petal.Length | 3.76 | 1.77 | 3.52 | (1.00, 6.90) |    -0.27 |    -1.40 | 150 |         0 |\n      |Petal.Width  | 1.20 | 0.76 | 1.50 | (0.10, 2.50) |    -0.10 |    -1.34 | 150 |         0 |\n\n# display() method exports to tinytable\n\n    Code\n      display(out, format = \"tt\")\n    Output\n      \n      +--------------+------+------+------+--------------+----------+----------+-----+-----------+\n      | Variable     | Mean | SD   | IQR  | Range        | Skewness | Kurtosis | n   | n_Missing |\n      +==============+======+======+======+==============+==========+==========+=====+===========+\n      | Sepal.Length | 5.84 | 0.83 | 1.30 | (4.30, 7.90) | 0.31     | -0.55    | 150 | 0         |\n      +--------------+------+------+------+--------------+----------+----------+-----+-----------+\n      | Sepal.Width  | 3.06 | 0.44 | 0.52 | (2.00, 4.40) | 0.32     | 0.23     | 150 | 0         |\n      +--------------+------+------+------+--------------+----------+----------+-----+-----------+\n      | Petal.Length | 3.76 | 1.77 | 3.52 | (1.00, 6.90) | -0.27    | -1.40    | 150 | 0         |\n      +--------------+------+------+------+--------------+----------+----------+-----+-----------+\n      | Petal.Width  | 1.20 | 0.76 | 1.50 | (0.10, 2.50) | -0.10    | -1.34    | 150 | 0         |\n      +--------------+------+------+------+--------------+----------+----------+-----+-----------+ \n\n"
  },
  {
    "path": "tests/testthat/_snaps/empty-dataframe.md",
    "content": "# remove empty with character\n\n    Code\n      remove_empty_columns(tmp)\n    Output\n         a  b  d\n      1  1  1  1\n      2  2 NA NA\n      3  3  3  3\n      4 NA NA NA\n      5  5  5  5\n\n---\n\n    Code\n      remove_empty_rows(tmp)\n    Output\n        a  b  c  d\n      1 1  1 NA  1\n      2 2 NA NA NA\n      3 3  3 NA  3\n      5 5  5 NA  5\n\n---\n\n    Code\n      remove_empty(tmp)\n    Output\n        a  b  d\n      1 1  1  1\n      2 2 NA NA\n      3 3  3  3\n      5 5  5  5\n\n"
  },
  {
    "path": "tests/testthat/_snaps/means_by_group.md",
    "content": "# mean_by_group\n\n    Code\n      means_by_group(efc, \"c12hour\", \"e42dep\")\n    Output\n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |           95% CI |      p\n      ----------------------------------------------------------------------\n      independent          |  17.00 |  2 | 11.31 | [-68.46, 102.46] | 0.573 \n      slightly dependent   |  34.25 |  4 | 29.97 | [-26.18,  94.68] | 0.626 \n      moderately dependent |  52.75 | 28 | 51.83 | [ 29.91,  75.59] | > .999\n      severely dependent   | 106.97 | 63 | 65.88 | [ 91.74, 122.19] | 0.001 \n      Total                |  86.46 | 97 | 66.40 |                  |       \n      \n      Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001\n\n---\n\n    Code\n      means_by_group(efc, \"c12hour\", \"e42dep\", ci = 0.99)\n    Output\n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |           99% CI |      p\n      ----------------------------------------------------------------------\n      independent          |  17.00 |  2 | 11.31 | [-96.17, 130.17] | 0.573 \n      slightly dependent   |  34.25 |  4 | 29.97 | [-45.77, 114.27] | 0.626 \n      moderately dependent |  52.75 | 28 | 51.83 | [ 22.50,  83.00] | > .999\n      severely dependent   | 106.97 | 63 | 65.88 | [ 86.80, 127.13] | 0.001 \n      Total                |  86.46 | 97 | 66.40 |                  |       \n      \n      Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001\n\n---\n\n    Code\n      means_by_group(efc, \"c12hour\", \"e42dep\", ci = NA)\n    Output\n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |      p\n      ---------------------------------------------------\n      independent          |  17.00 |  2 | 11.31 | 0.573 \n      slightly dependent   |  34.25 |  4 | 29.97 | 0.626 \n      moderately dependent |  52.75 | 28 | 51.83 | > .999\n      severely dependent   | 106.97 | 63 | 65.88 | 0.001 \n      Total                |  86.46 | 97 | 66.40 |       \n      \n      Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001\n\n---\n\n    Code\n      means_by_group(efc, c(\"neg_c_7\", \"c12hour\"), \"e42dep\")\n    Output\n      # Mean of Negative impact with 7 items by elder's dependency\n      \n      Category             |  Mean |  N |   SD |         95% CI |     p\n      -----------------------------------------------------------------\n      independent          | 11.00 |  2 | 0.00 | [ 5.00, 17.00] | 0.567\n      slightly dependent   | 10.00 |  4 | 3.16 | [ 5.76, 14.24] | 0.296\n      moderately dependent | 13.71 | 28 | 3.14 | [12.11, 15.32] | 0.296\n      severely dependent   | 14.67 | 60 | 4.78 | [13.57, 15.76] | 0.108\n      Total                | 14.11 | 94 | 4.34 |                |      \n      \n      Anova: R2=0.063; adj.R2=0.032; F=2.009; p=0.118\n      \n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |           95% CI |      p\n      ----------------------------------------------------------------------\n      independent          |  17.00 |  2 | 11.31 | [-68.46, 102.46] | 0.573 \n      slightly dependent   |  34.25 |  4 | 29.97 | [-26.18,  94.68] | 0.626 \n      moderately dependent |  52.75 | 28 | 51.83 | [ 29.91,  75.59] | > .999\n      severely dependent   | 106.97 | 63 | 65.88 | [ 91.74, 122.19] | 0.001 \n      Total                |  86.46 | 97 | 66.40 |                  |       \n      \n      Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001\n\n---\n\n    Code\n      means_by_group(efc, c(\"neg_c_7\", \"c12hour\"), \"e42dep\", ci = NA)\n    Output\n      # Mean of Negative impact with 7 items by elder's dependency\n      \n      Category             |  Mean |  N |   SD |     p\n      ------------------------------------------------\n      independent          | 11.00 |  2 | 0.00 | 0.567\n      slightly dependent   | 10.00 |  4 | 3.16 | 0.296\n      moderately dependent | 13.71 | 28 | 3.14 | 0.296\n      severely dependent   | 14.67 | 60 | 4.78 | 0.108\n      Total                | 14.11 | 94 | 4.34 |      \n      \n      Anova: R2=0.063; adj.R2=0.032; F=2.009; p=0.118\n      \n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |      p\n      ---------------------------------------------------\n      independent          |  17.00 |  2 | 11.31 | 0.573 \n      slightly dependent   |  34.25 |  4 | 29.97 | 0.626 \n      moderately dependent |  52.75 | 28 | 51.83 | > .999\n      severely dependent   | 106.97 | 63 | 65.88 | 0.001 \n      Total                |  86.46 | 97 | 66.40 |       \n      \n      Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001\n\n---\n\n    Code\n      means_by_group(efc, c(\"neg_c_7\", \"c12hour\"), \"e42dep\", ci = 0.99)\n    Output\n      # Mean of Negative impact with 7 items by elder's dependency\n      \n      Category             |  Mean |  N |   SD |         99% CI |     p\n      -----------------------------------------------------------------\n      independent          | 11.00 |  2 | 0.00 | [ 3.05, 18.95] | 0.567\n      slightly dependent   | 10.00 |  4 | 3.16 | [ 4.38, 15.62] | 0.296\n      moderately dependent | 13.71 | 28 | 3.14 | [11.59, 15.84] | 0.296\n      severely dependent   | 14.67 | 60 | 4.78 | [13.22, 16.12] | 0.108\n      Total                | 14.11 | 94 | 4.34 |                |      \n      \n      Anova: R2=0.063; adj.R2=0.032; F=2.009; p=0.118\n      \n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |           99% CI |      p\n      ----------------------------------------------------------------------\n      independent          |  17.00 |  2 | 11.31 | [-96.17, 130.17] | 0.573 \n      slightly dependent   |  34.25 |  4 | 29.97 | [-45.77, 114.27] | 0.626 \n      moderately dependent |  52.75 | 28 | 51.83 | [ 22.50,  83.00] | > .999\n      severely dependent   | 106.97 | 63 | 65.88 | [ 86.80, 127.13] | 0.001 \n      Total                |  86.46 | 97 | 66.40 |                  |       \n      \n      Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001\n\n---\n\n    Code\n      means_by_group(efc$c12hour, efc$e42dep)\n    Output\n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |           95% CI |      p\n      ----------------------------------------------------------------------\n      independent          |  17.00 |  2 | 11.31 | [-68.46, 102.46] | 0.573 \n      slightly dependent   |  34.25 |  4 | 29.97 | [-26.18,  94.68] | 0.626 \n      moderately dependent |  52.75 | 28 | 51.83 | [ 29.91,  75.59] | > .999\n      severely dependent   | 106.97 | 63 | 65.88 | [ 91.74, 122.19] | 0.001 \n      Total                |  86.46 | 97 | 66.40 |                  |       \n      \n      Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001\n\n---\n\n    Code\n      means_by_group(efc$c12hour, efc$e42dep, ci = NA)\n    Output\n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |      p\n      ---------------------------------------------------\n      independent          |  17.00 |  2 | 11.31 | 0.573 \n      slightly dependent   |  34.25 |  4 | 29.97 | 0.626 \n      moderately dependent |  52.75 | 28 | 51.83 | > .999\n      severely dependent   | 106.97 | 63 | 65.88 | 0.001 \n      Total                |  86.46 | 97 | 66.40 |       \n      \n      Anova: R2=0.186; adj.R2=0.160; F=7.098; p<.001\n\n"
  },
  {
    "path": "tests/testthat/_snaps/normalize.md",
    "content": "# normalize work as expected\n\n    Code\n      head(normalize(trees))\n    Output\n             Girth     Height      Volume\n      1 0.00000000 0.29166667 0.001497006\n      2 0.02439024 0.08333333 0.001497006\n      3 0.04065041 0.00000000 0.000000000\n      4 0.17886179 0.37500000 0.092814371\n      5 0.19512195 0.75000000 0.128742515\n      6 0.20325203 0.83333333 0.142215569\n\n"
  },
  {
    "path": "tests/testthat/_snaps/print.dw_transformer.md",
    "content": "# print.dw_transformer\n\n    Code\n      rescale(iris$Sepal.Length)\n    Output\n        [1]  22.222222  16.666667  11.111111   8.333333  19.444444  30.555556\n        [7]   8.333333  19.444444   2.777778  16.666667  30.555556  13.888889\n       [13]  13.888889   0.000000  41.666667  38.888889  30.555556  22.222222\n       [19]  38.888889  22.222222  30.555556  22.222222   8.333333  22.222222\n       [25]  13.888889  19.444444  19.444444  25.000000  25.000000  11.111111\n       [31]  13.888889  30.555556  25.000000  33.333333  16.666667  19.444444\n       [37]  33.333333  16.666667   2.777778  22.222222  19.444444   5.555556\n       [43]   2.777778  19.444444  22.222222  13.888889  22.222222   8.333333\n       [49]  27.777778  19.444444  75.000000  58.333333  72.222222  33.333333\n       [55]  61.111111  38.888889  55.555556  16.666667  63.888889  25.000000\n       [61]  19.444444  44.444444  47.222222  50.000000  36.111111  66.666667\n       [67]  36.111111  41.666667  52.777778  36.111111  44.444444  50.000000\n       [73]  55.555556  50.000000  58.333333  63.888889  69.444444  66.666667\n       [79]  47.222222  38.888889  33.333333  33.333333  41.666667  47.222222\n       [85]  30.555556  47.222222  66.666667  55.555556  36.111111  33.333333\n       [91]  33.333333  50.000000  41.666667  19.444444  36.111111  38.888889\n       [97]  38.888889  52.777778  22.222222  38.888889  55.555556  41.666667\n      [103]  77.777778  55.555556  61.111111  91.666667  16.666667  83.333333\n      [109]  66.666667  80.555556  61.111111  58.333333  69.444444  38.888889\n      [115]  41.666667  58.333333  61.111111  94.444444  94.444444  47.222222\n      [121]  72.222222  36.111111  94.444444  55.555556  66.666667  80.555556\n      [127]  52.777778  50.000000  58.333333  80.555556  86.111111 100.000000\n      [133]  58.333333  55.555556  50.000000  94.444444  55.555556  58.333333\n      [139]  47.222222  72.222222  66.666667  72.222222  41.666667  69.444444\n      [145]  66.666667  66.666667  55.555556  61.111111  52.777778  44.444444\n      (original range = 4.3 to 7.9)\n\n---\n\n    Code\n      normalize(iris$Sepal.Length)\n    Output\n        [1] 0.22222222 0.16666667 0.11111111 0.08333333 0.19444444 0.30555556\n        [7] 0.08333333 0.19444444 0.02777778 0.16666667 0.30555556 0.13888889\n       [13] 0.13888889 0.00000000 0.41666667 0.38888889 0.30555556 0.22222222\n       [19] 0.38888889 0.22222222 0.30555556 0.22222222 0.08333333 0.22222222\n       [25] 0.13888889 0.19444444 0.19444444 0.25000000 0.25000000 0.11111111\n       [31] 0.13888889 0.30555556 0.25000000 0.33333333 0.16666667 0.19444444\n       [37] 0.33333333 0.16666667 0.02777778 0.22222222 0.19444444 0.05555556\n       [43] 0.02777778 0.19444444 0.22222222 0.13888889 0.22222222 0.08333333\n       [49] 0.27777778 0.19444444 0.75000000 0.58333333 0.72222222 0.33333333\n       [55] 0.61111111 0.38888889 0.55555556 0.16666667 0.63888889 0.25000000\n       [61] 0.19444444 0.44444444 0.47222222 0.50000000 0.36111111 0.66666667\n       [67] 0.36111111 0.41666667 0.52777778 0.36111111 0.44444444 0.50000000\n       [73] 0.55555556 0.50000000 0.58333333 0.63888889 0.69444444 0.66666667\n       [79] 0.47222222 0.38888889 0.33333333 0.33333333 0.41666667 0.47222222\n       [85] 0.30555556 0.47222222 0.66666667 0.55555556 0.36111111 0.33333333\n       [91] 0.33333333 0.50000000 0.41666667 0.19444444 0.36111111 0.38888889\n       [97] 0.38888889 0.52777778 0.22222222 0.38888889 0.55555556 0.41666667\n      [103] 0.77777778 0.55555556 0.61111111 0.91666667 0.16666667 0.83333333\n      [109] 0.66666667 0.80555556 0.61111111 0.58333333 0.69444444 0.38888889\n      [115] 0.41666667 0.58333333 0.61111111 0.94444444 0.94444444 0.47222222\n      [121] 0.72222222 0.36111111 0.94444444 0.55555556 0.66666667 0.80555556\n      [127] 0.52777778 0.50000000 0.58333333 0.80555556 0.86111111 1.00000000\n      [133] 0.58333333 0.55555556 0.50000000 0.94444444 0.55555556 0.58333333\n      [139] 0.47222222 0.72222222 0.66666667 0.72222222 0.41666667 0.69444444\n      [145] 0.66666667 0.66666667 0.55555556 0.61111111 0.52777778 0.44444444\n      (original range = 4.3 to 7.9)\n\n---\n\n    Code\n      center(iris$Sepal.Length)\n    Output\n        [1] -0.74333333 -0.94333333 -1.14333333 -1.24333333 -0.84333333 -0.44333333\n        [7] -1.24333333 -0.84333333 -1.44333333 -0.94333333 -0.44333333 -1.04333333\n       [13] -1.04333333 -1.54333333 -0.04333333 -0.14333333 -0.44333333 -0.74333333\n       [19] -0.14333333 -0.74333333 -0.44333333 -0.74333333 -1.24333333 -0.74333333\n       [25] -1.04333333 -0.84333333 -0.84333333 -0.64333333 -0.64333333 -1.14333333\n       [31] -1.04333333 -0.44333333 -0.64333333 -0.34333333 -0.94333333 -0.84333333\n       [37] -0.34333333 -0.94333333 -1.44333333 -0.74333333 -0.84333333 -1.34333333\n       [43] -1.44333333 -0.84333333 -0.74333333 -1.04333333 -0.74333333 -1.24333333\n       [49] -0.54333333 -0.84333333  1.15666667  0.55666667  1.05666667 -0.34333333\n       [55]  0.65666667 -0.14333333  0.45666667 -0.94333333  0.75666667 -0.64333333\n       [61] -0.84333333  0.05666667  0.15666667  0.25666667 -0.24333333  0.85666667\n       [67] -0.24333333 -0.04333333  0.35666667 -0.24333333  0.05666667  0.25666667\n       [73]  0.45666667  0.25666667  0.55666667  0.75666667  0.95666667  0.85666667\n       [79]  0.15666667 -0.14333333 -0.34333333 -0.34333333 -0.04333333  0.15666667\n       [85] -0.44333333  0.15666667  0.85666667  0.45666667 -0.24333333 -0.34333333\n       [91] -0.34333333  0.25666667 -0.04333333 -0.84333333 -0.24333333 -0.14333333\n       [97] -0.14333333  0.35666667 -0.74333333 -0.14333333  0.45666667 -0.04333333\n      [103]  1.25666667  0.45666667  0.65666667  1.75666667 -0.94333333  1.45666667\n      [109]  0.85666667  1.35666667  0.65666667  0.55666667  0.95666667 -0.14333333\n      [115] -0.04333333  0.55666667  0.65666667  1.85666667  1.85666667  0.15666667\n      [121]  1.05666667 -0.24333333  1.85666667  0.45666667  0.85666667  1.35666667\n      [127]  0.35666667  0.25666667  0.55666667  1.35666667  1.55666667  2.05666667\n      [133]  0.55666667  0.45666667  0.25666667  1.85666667  0.45666667  0.55666667\n      [139]  0.15666667  1.05666667  0.85666667  1.05666667 -0.04333333  0.95666667\n      [145]  0.85666667  0.85666667  0.45666667  0.65666667  0.35666667  0.05666667\n      (center: 5.8, scale = 1)\n\n---\n\n    Code\n      standardize(iris$Sepal.Length)\n    Output\n        [1] -0.89767388 -1.13920048 -1.38072709 -1.50149039 -1.01843718 -0.53538397\n        [7] -1.50149039 -1.01843718 -1.74301699 -1.13920048 -0.53538397 -1.25996379\n       [13] -1.25996379 -1.86378030 -0.05233076 -0.17309407 -0.53538397 -0.89767388\n       [19] -0.17309407 -0.89767388 -0.53538397 -0.89767388 -1.50149039 -0.89767388\n       [25] -1.25996379 -1.01843718 -1.01843718 -0.77691058 -0.77691058 -1.38072709\n       [31] -1.25996379 -0.53538397 -0.77691058 -0.41462067 -1.13920048 -1.01843718\n       [37] -0.41462067 -1.13920048 -1.74301699 -0.89767388 -1.01843718 -1.62225369\n       [43] -1.74301699 -1.01843718 -0.89767388 -1.25996379 -0.89767388 -1.50149039\n       [49] -0.65614727 -1.01843718  1.39682886  0.67224905  1.27606556 -0.41462067\n       [55]  0.79301235 -0.17309407  0.55148575 -1.13920048  0.91377565 -0.77691058\n       [61] -1.01843718  0.06843254  0.18919584  0.30995914 -0.29385737  1.03453895\n       [67] -0.29385737 -0.05233076  0.43072244 -0.29385737  0.06843254  0.30995914\n       [73]  0.55148575  0.30995914  0.67224905  0.91377565  1.15530226  1.03453895\n       [79]  0.18919584 -0.17309407 -0.41462067 -0.41462067 -0.05233076  0.18919584\n       [85] -0.53538397  0.18919584  1.03453895  0.55148575 -0.29385737 -0.41462067\n       [91] -0.41462067  0.30995914 -0.05233076 -1.01843718 -0.29385737 -0.17309407\n       [97] -0.17309407  0.43072244 -0.89767388 -0.17309407  0.55148575 -0.05233076\n      [103]  1.51759216  0.55148575  0.79301235  2.12140867 -1.13920048  1.75911877\n      [109]  1.03453895  1.63835547  0.79301235  0.67224905  1.15530226 -0.17309407\n      [115] -0.05233076  0.67224905  0.79301235  2.24217198  2.24217198  0.18919584\n      [121]  1.27606556 -0.29385737  2.24217198  0.55148575  1.03453895  1.63835547\n      [127]  0.43072244  0.30995914  0.67224905  1.63835547  1.87988207  2.48369858\n      [133]  0.67224905  0.55148575  0.30995914  2.24217198  0.55148575  0.67224905\n      [139]  0.18919584  1.27606556  1.03453895  1.27606556 -0.05233076  1.15530226\n      [145]  1.03453895  1.03453895  0.55148575  0.79301235  0.43072244  0.06843254\n      (center: 5.8, scale = 0.83)\n\n"
  },
  {
    "path": "tests/testthat/_snaps/ranktransform.md",
    "content": "# ranktransform works with data frames\n\n    Code\n      ranktransform(BOD)\n    Output\n        Time demand\n      1    1      1\n      2    2      2\n      3    3      5\n      4    4      4\n      5    5      3\n      6    6      6\n\n"
  },
  {
    "path": "tests/testthat/_snaps/rescale_weights.md",
    "content": "# rescale_weights works as expected\n\n    Code\n      head(rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\"))\n    Output\n        total  age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR rescaled_weights_a\n      1     1 2.20        1        3       2       31 97593.68          1.5733612\n      2     7 2.08        2        3       1       29 39599.36          0.6231745\n      3     3 1.48        2        1       2       42 26619.83          0.8976966\n      4     4 1.32        2        4       2       33 34998.53          0.7083628\n      5     1 2.00        2        1       1       41 14746.45          0.4217782\n      6     6 2.20        2        4       1       38 28232.10          0.6877550\n        rescaled_weights_b\n      1          1.2005159\n      2          0.5246593\n      3          0.5439111\n      4          0.5498944\n      5          0.3119698\n      6          0.5155503\n\n---\n\n    Code\n      head(rescale_weights(nhanes_sample, \"WTINT2YR\", c(\"SDMVSTRA\", \"SDMVPSU\")))\n    Output\n        total  age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR pweight_a_SDMVSTRA\n      1     1 2.20        1        3       2       31 97593.68          1.5733612\n      2     7 2.08        2        3       1       29 39599.36          0.6231745\n      3     3 1.48        2        1       2       42 26619.83          0.8976966\n      4     4 1.32        2        4       2       33 34998.53          0.7083628\n      5     1 2.00        2        1       1       41 14746.45          0.4217782\n      6     6 2.20        2        4       1       38 28232.10          0.6877550\n        pweight_b_SDMVSTRA pweight_a_SDMVPSU pweight_b_SDMVPSU\n      1          1.2005159         1.8458164         1.3699952\n      2          0.5246593         0.8217570         0.5780808\n      3          0.5439111         0.5034683         0.3736824\n      4          0.5498944         0.6619369         0.4913004\n      5          0.3119698         0.3060151         0.2152722\n      6          0.5155503         0.5858662         0.4121388\n\n---\n\n    Code\n      head(rescale_weights(nhanes_sample, probability_weights = \"WTINT2YR\", method = \"kish\"))\n    Output\n        total  age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR rescaled_weights\n      1     1 2.20        1        3       2       31 97593.68        1.3952529\n      2     7 2.08        2        3       1       29 39599.36        0.5661343\n      3     3 1.48        2        1       2       42 26619.83        0.3805718\n      4     4 1.32        2        4       2       33 34998.53        0.5003582\n      5     1 2.00        2        1       1       41 14746.45        0.2108234\n      6     6 2.20        2        4       1       38 28232.10        0.4036216\n\n---\n\n    Code\n      rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\")\n    Output\n         total  age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR rescaled_weights_a\n      1      1 2.20        1        3       2       31 97593.68          1.0000000\n      2      7 2.08        2        3       1       29 39599.36          0.5819119\n      3      3 1.48        2        1       2       42       NA                 NA\n      4      4 1.32        2        4       2       33 34998.53          0.6766764\n      5      1 2.00        2        1       1       41 14746.45          0.7471696\n      6      6 2.20        2        4       1       38 28232.10          1.0000000\n      7    350 1.60        1        3       2       33 93162.43          1.8012419\n      8     NA 1.48        2        3       1       29 82275.99          1.2090441\n      9      3 2.28        2        4       1       41 24726.39          1.2528304\n      10    30 0.84        1        3       2       35       NA                 NA\n      11    70 1.24        1        4       2       33 27002.70          0.5220817\n      12     5 1.68        2        1       2       39 18792.03          1.0000000\n      13    60 2.20        1        3       2       30 76894.56          1.0000000\n      14     2 1.48        2        3       1       29       NA                 NA\n      15     8 2.36        2        3       2       39       NA                 NA\n      16     3 2.04        2        3       2       36 98200.91          1.0000000\n      17     1 2.08        1        3       1       40 87786.09          1.0000000\n      18     7 1.00        1        3       2       32 90803.16          1.0000000\n      19     9 2.28        2        3       2       34       NA                 NA\n      20     2 1.24        2        3       1       29 82275.99          1.2090441\n         rescaled_weights_b\n      1           1.0000000\n      2           0.5351412\n      3                  NA\n      4           0.5107078\n      5           0.7022777\n      6           1.0000000\n      7           1.3594509\n      8           1.1118681\n      9           1.1775572\n      10                 NA\n      11          0.3940306\n      12          1.0000000\n      13          1.0000000\n      14                 NA\n      15                 NA\n      16          1.0000000\n      17          1.0000000\n      18          1.0000000\n      19                 NA\n      20          1.1118681\n\n---\n\n    Code\n      rescale_weights(nhanes_sample, \"WTINT2YR\", method = \"kish\")\n    Output\n         total  age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR rescaled_weights\n      1      1 2.20        1        3       2       31 97593.68        1.2734329\n      2      7 2.08        2        3       1       29 39599.36        0.5167049\n      3      3 1.48        2        1       2       42       NA               NA\n      4      4 1.32        2        4       2       33 34998.53        0.4566718\n      5      1 2.00        2        1       1       41 14746.45        0.1924164\n      6      6 2.20        2        4       1       38 28232.10        0.3683813\n      7    350 1.60        1        3       2       33 93162.43        1.2156126\n      8     NA 1.48        2        3       1       29 82275.99        1.0735629\n      9      3 2.28        2        4       1       41 24726.39        0.3226377\n      10    30 0.84        1        3       2       35       NA               NA\n      11    70 1.24        1        4       2       33 27002.70        0.3523397\n      12     5 1.68        2        1       2       39 18792.03        0.2452044\n      13    60 2.20        1        3       2       30 76894.56        1.0033444\n      14     2 1.48        2        3       1       29       NA               NA\n      15     8 2.36        2        3       2       39       NA               NA\n      16     3 2.04        2        3       2       36 98200.91        1.2813563\n      17     1 2.08        1        3       1       40 87786.09        1.1454605\n      18     7 1.00        1        3       2       32 90803.16        1.1848281\n      19     9 2.28        2        3       2       34       NA               NA\n      20     2 1.24        2        3       1       29 82275.99        1.0735629\n\n# rescale_weights nested works as expected\n\n    Code\n      rescale_weights(data = head(nhanes_sample, n = 30), by = c(\"SDMVSTRA\",\n        \"SDMVPSU\"), probability_weights = \"WTINT2YR\", nest = TRUE)\n    Output\n         total  age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA  WTINT2YR rescaled_weights_a\n      1      1 2.20        1        3       2       31 97593.679          1.0000000\n      2      7 2.08        2        3       1       29 39599.363          0.5502486\n      3      3 1.48        2        1       2       42 26619.834          0.9512543\n      4      4 1.32        2        4       2       33 34998.530          0.6766764\n      5      1 2.00        2        1       1       41 14746.454          0.7147710\n      6      6 2.20        2        4       1       38 28232.100          1.0000000\n      7    350 1.60        1        3       2       33 93162.431          1.8012419\n      8     NA 1.48        2        3       1       29 82275.986          1.1432570\n      9      3 2.28        2        4       1       41 24726.391          1.1985056\n      10    30 0.84        1        3       2       35 39895.048          1.0000000\n      11    70 1.24        1        4       2       33 27002.703          0.5220817\n      12     5 1.68        2        1       2       39 18792.034          0.3866720\n      13    60 2.20        1        3       2       30 76894.563          1.0000000\n      14     2 1.48        2        3       1       29 82275.986          1.1432570\n      15     8 2.36        2        3       2       39 78406.811          1.6133280\n      16     3 2.04        2        3       2       36 98200.912          1.0000000\n      17     1 2.08        1        3       1       40 87786.091          1.0000000\n      18     7 1.00        1        3       2       32 90803.158          1.2693642\n      19     9 2.28        2        3       2       34 45002.917          1.0000000\n      20     2 1.24        2        3       1       29 82275.986          1.1432570\n      21     4 2.28        2        3       1       34 91437.145          1.4088525\n      22     3 1.04        1        1       2       42 29348.027          1.0487457\n      23     4 1.12        1        1       1       34 38366.567          0.5911475\n      24     1 1.52        2        1       1       42  6622.334          1.0000000\n      25    22 2.24        1        4       1       41 22420.209          1.0867233\n      26     7 1.00        2        3       2       41 65529.204          1.0000000\n      27     5 0.92        2        4       1       30 27089.745          1.0000000\n      28    15 1.04        1        3       2       32 52265.570          0.7306358\n      29     3 0.80        1        3       1       33 64789.307          1.0000000\n      30     1 1.00        1        3       1       29 73404.222          1.0199804\n         rescaled_weights_b\n      1           1.0000000\n      2           0.5226284\n      3           0.9489993\n      4           0.5107078\n      5           0.6854605\n      6           1.0000000\n      7           1.3594509\n      8           1.0858702\n      9           1.1493587\n      10          1.0000000\n      11          0.3940306\n      12          0.2809766\n      13          1.0000000\n      14          1.0858702\n      15          1.1723308\n      16          1.0000000\n      17          1.0000000\n      18          1.1834934\n      19          1.0000000\n      20          1.0858702\n      21          1.2070771\n      22          1.0462596\n      23          0.5064835\n      24          1.0000000\n      25          1.0421602\n      26          1.0000000\n      27          1.0000000\n      28          0.6812093\n      29          1.0000000\n      30          0.9687816\n\n"
  },
  {
    "path": "tests/testthat/_snaps/reshape_ci.md",
    "content": "# reshape_ci with single CI level\n\n    Code\n      df_reshape\n    Output\n        Parameter CI_low CI_high  CI\n      1    Term 1    0.2     0.5 0.8\n\n# reshape_ci with multiple CI levels\n\n    Code\n      reshape_ci(x)\n    Output\n        Parameter CI_low_0.8 CI_high_0.8 CI_low_0.9 CI_high_0.9\n      1    Term 1        0.2         0.5       0.10        0.80\n      2    Term 2        0.3         0.6       0.15        0.85\n\n---\n\n    Code\n      reshape_ci(reshape_ci(x))\n    Output\n        Parameter  CI CI_low CI_high\n      1    Term 1 0.8   0.20    0.50\n      2    Term 1 0.9   0.10    0.80\n      3    Term 2 0.8   0.30    0.60\n      4    Term 2 0.9   0.15    0.85\n\n"
  },
  {
    "path": "tests/testthat/_snaps/skewness-kurtosis.md",
    "content": "# skewness works with data frames\n\n    Code\n      skewness(iris[, 1:4])\n    Output\n      Parameter    | Skewness |    SE\n      -------------------------------\n      Sepal.Length |    0.315 | 0.196\n      Sepal.Width  |    0.319 | 0.196\n      Petal.Length |   -0.275 | 0.196\n      Petal.Width  |   -0.103 | 0.196\n\n---\n\n    Code\n      skewness(iris[, 1:4], iterations = 100)\n    Output\n      Parameter    | Skewness |    SE\n      -------------------------------\n      Sepal.Length |    0.315 | 0.126\n      Sepal.Width  |    0.319 | 0.175\n      Petal.Length |   -0.275 | 0.137\n      Petal.Width  |   -0.103 | 0.134\n\n# kurtosis works with data frames\n\n    Code\n      kurtosis(iris[, 1:4])\n    Output\n      Parameter    | Kurtosis |    SE\n      -------------------------------\n      Sepal.Length |   -0.552 | 0.381\n      Sepal.Width  |    0.228 | 0.381\n      Petal.Length |   -1.402 | 0.381\n      Petal.Width  |   -1.341 | 0.381\n\n---\n\n    Code\n      kurtosis(iris[, 1:4], iterations = 100)\n    Output\n      Parameter    | Kurtosis |    SE\n      -------------------------------\n      Sepal.Length |   -0.552 | 0.188\n      Sepal.Width  |    0.228 | 0.351\n      Petal.Length |   -1.402 | 0.167\n      Petal.Width  |   -1.341 | 0.115\n\n# skewness works with matrices\n\n    Code\n      skewness(as.matrix(iris[, 1:4]))\n    Output\n      Parameter    | Skewness |    SE\n      -------------------------------\n      Sepal.Length |    0.315 | 0.196\n      Sepal.Width  |    0.319 | 0.196\n      Petal.Length |   -0.275 | 0.196\n      Petal.Width  |   -0.103 | 0.196\n\n---\n\n    Code\n      skewness(as.matrix(iris[, 1:4]), iterations = 100)\n    Output\n      Parameter    | Skewness |    SE\n      -------------------------------\n      Sepal.Length |    0.315 | 0.126\n      Sepal.Width  |    0.319 | 0.175\n      Petal.Length |   -0.275 | 0.137\n      Petal.Width  |   -0.103 | 0.134\n\n# kurtosis works with matrices\n\n    Code\n      kurtosis(as.matrix(iris[, 1:4]))\n    Output\n      Parameter    | Kurtosis |    SE\n      -------------------------------\n      Sepal.Length |   -0.552 | 0.381\n      Sepal.Width  |    0.228 | 0.381\n      Petal.Length |   -1.402 | 0.381\n      Petal.Width  |   -1.341 | 0.381\n\n---\n\n    Code\n      kurtosis(as.matrix(iris[, 1:4]), iterations = 100)\n    Output\n      Parameter    | Kurtosis |    SE\n      -------------------------------\n      Sepal.Length |   -0.552 | 0.188\n      Sepal.Width  |    0.228 | 0.351\n      Petal.Length |   -1.402 | 0.167\n      Petal.Width  |   -1.341 | 0.115\n\n"
  },
  {
    "path": "tests/testthat/_snaps/smoothness.md",
    "content": "# smoothness works with data frames\n\n    Code\n      smoothness(BOD)\n    Output\n             Parameter                    \n      Time   \"Time\"    \"0.986393923832144\"\n      demand \"demand\"  \"0.406270770677043\"\n      attr(,\"class\")\n      [1] \"parameters_smoothness\" \"matrix\"                \"array\"                \n\n"
  },
  {
    "path": "tests/testthat/_snaps/text_format.md",
    "content": "# text formatting helpers work as expected\n\n    Code\n      text_format(c(\"A very long First\", \"Some similar long Second\", \"Shorter Third\",\n        \"More or less long Fourth\", \"And finally the Last\"), width = 20)\n    Output\n      [1] \"A very long First,\\nSome similar long\\nSecond, Shorter\\nThird, More or less\\nlong Fourth and And\\nfinally the Last\\n\"\n\n---\n\n    Code\n      text_format(c(\"A very long First\", \"Some similar long Second\", \"Shorter Third\",\n        \"More or less long Fourth\", \"And finally the Last\"), last = \" or \", enclose = \"`\",\n      width = 20)\n    Output\n      [1] \"`A very long\\nFirst`, `Some\\nsimilar long\\nSecond`, `Shorter\\nThird`, `More or\\nless long Fourth`\\nor `And finally the\\nLast`\\n\"\n\n# text formatters respect `width` argument\n\n    Code\n      long_text <- strrep(\"abc \", 100)\n      cat(text_format(long_text, width = 50))\n    Output\n       abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc\n    Code\n      cat(text_format(long_text, width = 80))\n    Output\n       abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc\n    Code\n      withr::with_options(list(width = 50), code = {\n        cat(text_format(long_text))\n      })\n    Output\n       abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc abc abc abc abc abc abc abc abc\n      abc abc abc abc\n\n"
  },
  {
    "path": "tests/testthat/_snaps/windows/means_by_group.md",
    "content": "# mean_by_group, weighted\n\n    Code\n      means_by_group(efc, \"c12hour\", \"e42dep\", weights = \"weight\")\n    Output\n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |           95% CI |      p\n      ----------------------------------------------------------------------\n      independent          |  16.92 |  3 | 11.31 | [-60.82,  94.66] | 0.486 \n      slightly dependent   |  33.56 |  4 | 29.75 | [-26.93,  94.05] | 0.593 \n      moderately dependent |  52.74 | 26 | 54.44 | [ 28.71,  76.76] | 0.996 \n      severely dependent   | 108.08 | 67 | 65.40 | [ 93.01, 123.16] | < .001\n      Total                |  88.11 | 97 | 67.01 |                  |       \n      \n      Anova: R2=0.191; adj.R2=0.165; F=7.329; p<.001\n\n---\n\n    Code\n      means_by_group(efc, \"c12hour\", \"e42dep\", weights = \"weight\", ci = NA)\n    Output\n      # Mean of average number of hours of care per week by elder's dependency\n      \n      Category             |   Mean |  N |    SD |      p\n      ---------------------------------------------------\n      independent          |  16.92 |  3 | 11.31 | 0.486 \n      slightly dependent   |  33.56 |  4 | 29.75 | 0.593 \n      moderately dependent |  52.74 | 26 | 54.44 | 0.996 \n      severely dependent   | 108.08 | 67 | 65.40 | < .001\n      Total                |  88.11 | 97 | 67.01 |       \n      \n      Anova: R2=0.191; adj.R2=0.165; F=7.329; p<.001\n\n"
  },
  {
    "path": "tests/testthat/_snaps/winsorization.md",
    "content": "# with missing values\n\n    Code\n      suppressWarnings(head(winsorize(na.omit(ggplot2::msleep$brainwt))))\n    Output\n      [1] 0.0155 0.0024 0.1750 0.0700 0.0982 0.1150\n\n"
  },
  {
    "path": "tests/testthat/helper-state.R",
    "content": "testthat::set_state_inspector(function() {\n  # sometimes a dependency might add a custom option, so we need to\n  # make sure we don't fail because of such additions\n  options <- options()\n\n  # Result of `dput(names(options()))`\n  base_options <- c(\n    \"add.smooth\",\n    \"askpass\",\n    \"asksecret\",\n    \"bitmapType\",\n    \"browser\",\n    \"browserNLdisabled\",\n    \"buildtools.check\",\n    \"buildtools.with\",\n    \"callr.condition_handler_cli_message\",\n    \"CBoundsCheck\",\n    \"check.bounds\",\n    \"citation.bibtex.max\",\n    \"connectionObserver\",\n    \"continue\",\n    \"contrasts\",\n    \"defaultPackages\",\n    \"demo.ask\",\n    \"deparse.cutoff\",\n    \"deparse.max.lines\",\n    \"device\",\n    \"device.ask.default\",\n    \"digits\",\n    \"download.file.method\",\n    \"dvipscmd\",\n    \"echo\",\n    \"editor\",\n    \"encoding\",\n    \"example.ask\",\n    \"expressions\",\n    \"ggvis.renderer\",\n    \"help_type\",\n    \"help.search.types\",\n    \"help.try.all.packages\",\n    \"HTTPUserAgent\",\n    \"install.packages.compile.from.source\",\n    \"internet.info\",\n    \"keep.parse.data\",\n    \"keep.parse.data.pkgs\",\n    \"keep.source\",\n    \"keep.source.pkgs\",\n    \"locatorBell\",\n    \"mailer\",\n    \"matprod\",\n    \"max.contour.segments\",\n    \"max.print\",\n    \"menu.graphics\",\n    \"na.action\",\n    \"nwarnings\",\n    \"OutDec\",\n    \"page_viewer\",\n    \"pager\",\n    \"papersize\",\n    \"PCRE_limit_recursion\",\n    \"PCRE_study\",\n    \"PCRE_use_JIT\",\n    \"pdfviewer\",\n    \"pkgType\",\n    \"plumber.docs.callback\",\n    \"plumber.swagger.url\",\n    \"printcmd\",\n    \"profvis.keep_output\",\n    \"profvis.print\",\n    \"profvis.prof_extension\",\n    \"profvis.prof_output\",\n    \"prompt\",\n    \"repos\",\n    \"restart\",\n    \"reticulate.initialized\",\n    \"reticulate.repl.busy\",\n    \"reticulate.repl.hook\",\n    \"reticulate.repl.initialize\",\n    \"reticulate.repl.teardown\",\n    \"rl_word_breaks\",\n    \"rsconnect.check.certificate\",\n    \"rstudio.notebook.executing\",\n    \"RStudioGD.antialias\",\n    \"RStudioGD.backend\",\n    \"scipen\",\n    \"shiny.launch.browser\",\n    \"shinygadgets.showdialog\",\n    \"show.coef.Pvalues\",\n    \"show.error.messages\",\n    \"show.signif.stars\",\n    \"showErrorCalls\",\n    \"showNCalls\",\n    \"showWarnCalls\",\n    \"str\",\n    \"str.dendrogram.last\",\n    \"terminal.manager\",\n    \"texi2dvi\",\n    \"timeout\",\n    \"ts.eps\",\n    \"ts.S.compat\",\n    \"unzip\",\n    \"useFancyQuotes\",\n    \"verbose\",\n    \"viewer\",\n    \"warn\",\n    \"warning.length\",\n    \"warnPartialMatchArgs\",\n    \"warnPartialMatchAttr\",\n    \"warnPartialMatchDollar\",\n    \"width\"\n  )\n  options <- options[base_options]\n\n  list(\n    attached = search(),\n    connections = nrow(showConnections()),\n    cwd = getwd(),\n    envvars = Sys.getenv(),\n    libpaths = .libPaths(),\n    locale = Sys.getlocale(),\n    options = options,\n    packages = .packages(all.available = TRUE),\n    NULL\n  )\n})\n"
  },
  {
    "path": "tests/testthat/helper.R",
    "content": "if (insight::check_if_installed(\"poorman\", stop = FALSE)) {\n  `%>%` <- poorman::`%>%`\n}\n"
  },
  {
    "path": "tests/testthat/test-adjust.R",
    "content": "test_that(\"adjust multilevel\", {\n  skip_if_not_installed(\"lme4\")\n  adj <- adjust(\n    iris[c(\"Sepal.Length\", \"Species\")],\n    multilevel = TRUE,\n    bayesian = FALSE\n  )\n  # High tolerance to avoid issues on some R CMD check specification, see #592\n  expect_equal(\n    head(adj$Sepal.Length),\n    c(0.08698, -0.11302, -0.31302, -0.41302, -0.01302, 0.38698),\n    tolerance = 1e-1\n  )\n})\n\ntest_that(\"adjust\", {\n  adj <- adjust(\n    iris[c(\"Sepal.Length\", \"Species\")],\n    multilevel = FALSE,\n    bayesian = FALSE\n  )\n  expect_equal(\n    head(adj$Sepal.Length),\n    c(0.094, -0.106, -0.306, -0.406, -0.006, 0.394),\n    tolerance = 1e-3\n  )\n})\n\n# select helpers ------------------------------\ntest_that(\"adjust regex\", {\n  expect_identical(\n    adjust(mtcars, select = \"pg\", regex = TRUE),\n    adjust(mtcars, select = \"mpg\")\n  )\n  expect_identical(\n    adjust(mtcars, select = \"pg$\", regex = TRUE),\n    adjust(mtcars, select = \"mpg\")\n  )\n})\n\n# select helpers ------------------------------\ntest_that(\"adjust, invalid column names\", {\n  data(iris)\n  colnames(iris)[1] <- \"I am\"\n  expect_error(\n    adjust(iris[c(\"I am\", \"Species\")], multilevel = FALSE, bayesian = FALSE),\n    regex = \"Bad column names\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-assign_labels.R",
    "content": "test_that(\"assign_labels, unnamed values\", {\n  x <- 1:3\n  # labelling by providing required number of labels\n  out <- assign_labels(\n    x,\n    variable = \"My x\",\n    values = c(\"one\", \"two\", \"three\")\n  )\n  expect_identical(attributes(out)$label, \"My x\")\n  expect_identical(\n    attributes(out)$labels,\n    structure(1:3, names = c(\"one\", \"two\", \"three\"))\n  )\n})\n\ntest_that(\"assign_labels, named values\", {\n  # labelling using named vectors\n  x <- factor(letters[1:3])\n  out <- assign_labels(\n    x,\n    variable = \"Labelled factor\",\n    values = c(a = \"low\", b = \"mid\", c = \"high\")\n  )\n  expect_identical(attributes(out)$label, \"Labelled factor\")\n  expect_identical(attributes(out)$labels, c(low = \"a\", mid = \"b\", high = \"c\"))\n})\n\ntest_that(\"assign_labels, partially named values\", {\n  x <- 1:5\n  out <- assign_labels(\n    x,\n    variable = \"My x\",\n    values = c(`1` = \"lowest\", `5` = \"highest\"),\n    verbose = FALSE\n  )\n  expect_identical(attributes(out)$label, \"My x\")\n  expect_identical(attributes(out)$labels, c(lowest = 1, highest = 5))\n})\n\ntest_that(\"assign_labels, errors\", {\n  x <- 1:5\n  expect_error(assign_labels(x, values = c(`1` = \"lowest\", `6` = \"highest\")))\n  expect_error(assign_labels(\n    x,\n    variable = 1,\n    values = c(`1` = \"lowest\", `6` = \"highest\")\n  ))\n  expect_error(assign_labels(x, values = c(\"a\", \"b\", \"c\")))\n})\n\ntest_that(\"assign_labels, data frame\", {\n  data(iris)\n  out <- assign_labels(iris, \"Species\", values = c(\"a\", \"b\", \"c\"))\n  expect_identical(\n    attributes(out$Species)$labels,\n    c(a = \"setosa\", b = \"versicolor\", c = \"virginica\")\n  )\n\n  data(mtcars)\n  out <- assign_labels(\n    mtcars,\n    select = c(\"am\", \"vs\"),\n    values = c(\"low\", \"high\")\n  )\n  expect_identical(attributes(out$am)$labels, c(low = 0, high = 1))\n  expect_identical(attributes(out$vs)$labels, c(low = 0, high = 1))\n  expect_null(attributes(out$gear)$labels)\n  expect_null(attributes(out$cyl)$labels)\n})\n"
  },
  {
    "path": "tests/testthat/test-attributes-grouped-df.R",
    "content": "# data_arrange -----------------------------------\n\ntest_that(\"data_arrange, attributes preserved\", {\n  # if dplyr:::`[.grouped_df` in the environment it destroys the attributes\n  # (only occurs when we run tests in random order)\n  skip_if(\"[.grouped_df\" %in% methods(`[`))\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_arrange(data_group(x, \"cyl\"), \"hp\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# rescale -----------------------------------\n\ntest_that(\"rescale, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- rescale(data_group(x, \"Species\"), 1:3)\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# center -----------------------------------\n\ntest_that(\"center, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- center(data_group(x, \"Species\"), \"Sepal.Width\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# categorize -----------------------------------\n\ntest_that(\"categorize, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- categorize(data_group(x, \"Species\"), \"Sepal.Width\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# standardize -----------------------------------\n\ntest_that(\"standardize, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- standardize(data_group(x, \"Species\"), \"Sepal.Width\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n# filter -----------------------------------\n\ntest_that(\"filter, attributes preserved\", {\n  # if dplyr:::`[.grouped_df` in the environment it destroys the attributes\n  # (only occurs when we run tests in random order)\n  skip_if(\"[.grouped_df\" %in% methods(`[`))\n  test <- data.frame(\n    id = c(1, 1, 2, 2),\n    x = c(0, 1, 3, 4)\n  )\n  attr(test, \"myattri\") <- \"I'm here\"\n  test2 <- data_filter(data_group(test, \"id\"), x == min(x))\n  expect_identical(attr(test2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n"
  },
  {
    "path": "tests/testthat/test-attributes.R",
    "content": "data(efc, package = \"datawizard\")\n\n# data_filter -----------------------------------\n\ntest_that(\"data_filter, attributes preserved\", {\n  attr(efc, \"myattri\") <- \"I'm here\"\n  x <- data_filter(efc, c172code == 1 & c12hour > 40)\n  expect_identical(\n    attr(x$e42dep, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE)\n  )\n  expect_identical(\n    attr(x, \"myattri\", exact = TRUE),\n    \"I'm here\"\n  )\n})\n\n\n# data_arrange -----------------------------------\n\ntest_that(\"data_arrange, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_arrange(x, \"hp\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# data_match -----------------------------------\n\ntest_that(\"data_match, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_match(x, data.frame(vs = 0, am = 1))\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# data_select -----------------------------------\n\ntest_that(\"data_select, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_select(x, \"hp\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# data_group -----------------------------------\n\ntest_that(\"data_group, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_group(x, \"cyl\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# data_relocate -----------------------------------\n\ntest_that(\"data_relocate, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_relocate(x, \"am\", \"mpg\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# data_remove -----------------------------------\n\ntest_that(\"data_remove, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_remove(x, \"am\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# data_reorder -----------------------------------\n\ntest_that(\"data_reorder, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_reorder(x, c(\"hp\", \"vs\", \"wt\"))\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# data_to_long -----------------------------------\n\ntest_that(\"data_to_long, attributes preserved\", {\n  wide_data <- data.frame(replicate(5, rnorm(10)))\n  attr(wide_data, \"myattri\") <- \"I'm here\"\n  x2 <- data_to_long(wide_data)\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# to_numeric -----------------------------------\n\ntest_that(\"to_numeric, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- to_numeric(x, \"Species\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# convert_to_na -----------------------------------\n\ntest_that(\"convert_to_na, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- convert_to_na(x, na = 2, verbose = FALSE)\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n  # label attribute is preserved\n  attr(x$Species, \"label\") <- \"Species Variable\"\n  x2 <- convert_to_na(x, na = \"setosa\", drop_levels = TRUE, verbose = FALSE)\n  expect_identical(attributes(x$Species)$label, \"Species Variable\")\n})\n\n\n# data_rename -----------------------------------\n\ntest_that(\"data_rename, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- data_rename(x, select = \"hp\", replacement = \"horsepower\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# rescale -----------------------------------\n\ntest_that(\"rescale, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- rescale(x, 1:3)\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# center -----------------------------------\n\ntest_that(\"center, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- center(x, \"Sepal.Width\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# categorize -----------------------------------\n\ntest_that(\"categorize, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- categorize(x, \"Sepal.Width\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# change_code -----------------------------------\n\ntest_that(\"recode_values, attributes preserved\", {\n  x <- mtcars\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- recode_values(x, select = \"am\", recode = list(`5` = 0, `10` = 1))\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n\n\n# standardize -----------------------------------\n\ntest_that(\"standardize, attributes preserved\", {\n  x <- iris\n  attr(x, \"myattri\") <- \"I'm here\"\n  x2 <- standardize(x, \"Sepal.Width\")\n  expect_identical(attr(x2, \"myattri\", exact = TRUE), \"I'm here\")\n})\n"
  },
  {
    "path": "tests/testthat/test-categorize.R",
    "content": "set.seed(123)\nd <- sample.int(10, size = 500, replace = TRUE)\n\ntest_that(\"recode median\", {\n  expect_identical(categorize(d), ifelse(d >= median(d), 2, 1))\n  expect_identical(categorize(d, lowest = 0), as.numeric(d >= median(d)))\n})\n\ntest_that(\"recode mean\", {\n  expect_identical(categorize(d, split = \"mean\"), ifelse(d >= mean(d), 2, 1))\n  expect_identical(\n    categorize(d, split = \"mean\", lowest = 0),\n    as.numeric(d >= mean(d))\n  )\n})\n\ntest_that(\"recode quantile\", {\n  expect_error(categorize(d, split = \"quantile\"))\n\n  q <- quantile(d, probs = c(1 / 3, 2 / 3, 1))\n  f <- cut(\n    d,\n    breaks = unique(c(min(d), q, max(d))),\n    include.lowest = TRUE,\n    right = FALSE\n  )\n  levels(f) <- 1:nlevels(f)\n  expect_identical(\n    categorize(d, split = \"quantile\", n_groups = 3),\n    as.numeric(f)\n  )\n  expect_identical(\n    categorize(d, split = \"quantile\", n_groups = 3, lowest = 0),\n    as.numeric(f) - 1\n  )\n})\n\nset.seed(123)\nd <- sample.int(100, size = 1000, replace = TRUE)\n\ntest_that(\"recode range\", {\n  expect_error(categorize(d, split = \"range\"))\n  d2 <- d\n  d2[d <= 20] <- 1\n  d2[d > 20 & d <= 40] <- 2\n  d2[d > 40 & d <= 60] <- 3\n  d2[d > 60 & d <= 80] <- 4\n  d2[d > 80] <- 5\n  expect_equal(\n    table(categorize(d, split = \"equal_range\", range = 20)),\n    table(d2),\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    table(categorize(\n      d,\n      split = \"equal_range\",\n      range = 20,\n      lowest = 1\n    )),\n    table(d2),\n    ignore_attr = TRUE\n  )\n\n  d2 <- d\n  d2[d < 20] <- 0\n  d2[d >= 20 & d < 40] <- 1\n  d2[d >= 40 & d < 60] <- 2\n  d2[d >= 60 & d < 80] <- 3\n  d2[d >= 80] <- 4\n  expect_equal(\n    table(categorize(\n      d,\n      split = \"equal_range\",\n      range = 20,\n      lowest = 0\n    )),\n    table(d2),\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"recode length\", {\n  expect_error(categorize(d, split = \"equal_length\"))\n  d2 <- d\n  d2[d <= 20] <- 1\n  d2[d > 20 & d <= 40] <- 2\n  d2[d > 40 & d <= 60] <- 3\n  d2[d > 60 & d <= 80] <- 4\n  d2[d > 80] <- 5\n  expect_equal(\n    table(categorize(d, split = \"equal_length\", n_groups = 5)),\n    table(d2),\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    table(categorize(\n      d,\n      split = \"equal_length\",\n      n_groups = 5,\n      lowest = 1\n    )),\n    table(d2),\n    ignore_attr = TRUE\n  )\n})\n\nset.seed(123)\nx <- sample.int(10, size = 30, replace = TRUE)\ntest_that(\"recode factor labels\", {\n  expect_type(categorize(x, \"equal_length\", n_groups = 3), \"double\")\n  expect_s3_class(\n    categorize(\n      x,\n      \"equal_length\",\n      n_groups = 3,\n      labels = c(\"low\", \"mid\", \"high\")\n    ),\n    \"factor\"\n  )\n  expect_identical(\n    levels(categorize(\n      x,\n      \"equal_length\",\n      n_groups = 3,\n      labels = c(\"low\", \"mid\", \"high\")\n    )),\n    c(\"low\", \"mid\", \"high\")\n  )\n  t1 <- table(categorize(x, \"equal_length\", n_groups = 3))\n  t2 <- table(categorize(\n    x,\n    \"equal_length\",\n    n_groups = 3,\n    labels = c(\"low\", \"mid\", \"high\")\n  ))\n  expect_equal(t1, t2, ignore_attr = TRUE)\n})\n\ntest_that(\"recode data frame\", {\n  data(iris)\n  x <- iris\n  out <- categorize(\n    x,\n    split = \"median\",\n    select = c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n  expect_s3_class(out, \"data.frame\")\n  expect_identical(\n    out$Sepal.Length,\n    ifelse(iris$Sepal.Length >= median(iris$Sepal.Length), 2, 1)\n  )\n  expect_identical(out$Petal.Length, iris$Petal.Length)\n\n  out <- categorize(x, split = \"median\", select = starts_with(\"Sepal\"))\n  expect_s3_class(out, \"data.frame\")\n  expect_identical(\n    out$Sepal.Length,\n    ifelse(iris$Sepal.Length >= median(iris$Sepal.Length), 2, 1)\n  )\n  expect_identical(out$Petal.Length, iris$Petal.Length)\n\n  out <- categorize(x, split = \"median\", select = ~ Sepal.Width + Sepal.Length)\n  expect_s3_class(out, \"data.frame\")\n  expect_identical(\n    out$Sepal.Length,\n    ifelse(iris$Sepal.Length >= median(iris$Sepal.Length), 2, 1)\n  )\n  expect_identical(out$Petal.Length, iris$Petal.Length)\n\n  out <- categorize(x, split = \"median\", select = Sepal.Length)\n  expect_s3_class(out, \"data.frame\")\n  expect_identical(\n    out$Sepal.Length,\n    ifelse(iris$Sepal.Length >= median(iris$Sepal.Length), 2, 1)\n  )\n  expect_identical(out$Petal.Length, iris$Petal.Length)\n\n  expect_warning(\n    expect_warning(\n      out <- categorize(\n        x,\n        split = \"median\",\n        select = c(\"sepal.Length\", \"sepal.Width\"),\n        ignore_case = FALSE\n      ),\n      \"not found\"\n    ),\n    \"not found\"\n  )\n  expect_identical(out$Sepal.Length, iris$Sepal.Length)\n\n  out <- categorize(\n    x,\n    split = \"median\",\n    select = starts_with(\"sepal\"),\n    ignore_case = TRUE\n  )\n  expect_s3_class(out, \"data.frame\")\n  expect_identical(\n    out$Sepal.Length,\n    ifelse(iris$Sepal.Length >= median(iris$Sepal.Length), 2, 1)\n  )\n  expect_identical(out$Petal.Length, iris$Petal.Length)\n\n  out <- categorize(\n    x,\n    split = \"median\",\n    select = starts_with(\"sepal\"),\n    ignore_case = FALSE\n  )\n  expect_identical(out$Sepal.Length, iris$Sepal.Length)\n\n  out <- categorize(\n    x,\n    split = \"median\",\n    select = starts_with(\"sepal\"),\n    ignore_case = TRUE,\n    append = \"_r\"\n  )\n  expect_identical(\n    colnames(out),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"Sepal.Length_r\",\n      \"Sepal.Width_r\"\n    )\n  )\n\n  out <- categorize(iris, split = \"median\", select = starts_with(\"Sepal\"))\n  expect_identical(\n    out$Sepal.Length,\n    c(\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      2,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      1,\n      2,\n      2,\n      2,\n      1,\n      2,\n      1,\n      2,\n      1,\n      2,\n      1,\n      1,\n      2,\n      2,\n      2,\n      1,\n      2,\n      1,\n      2,\n      2,\n      1,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      1,\n      1,\n      1,\n      2,\n      2,\n      1,\n      2,\n      2,\n      2,\n      1,\n      1,\n      1,\n      2,\n      2,\n      1,\n      1,\n      1,\n      1,\n      2,\n      1,\n      1,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      1,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      1,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      1,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2\n    )\n  )\n\n  skip_if_not_installed(\"poorman\")\n\n  x <- poorman::group_by(iris, Species)\n  out <- categorize(x, split = \"median\", select = starts_with(\"Sepal\"))\n  expect_identical(\n    out$Sepal.Length,\n    c(\n      2,\n      1,\n      1,\n      1,\n      2,\n      2,\n      1,\n      2,\n      1,\n      1,\n      2,\n      1,\n      1,\n      1,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      1,\n      2,\n      1,\n      2,\n      2,\n      2,\n      2,\n      1,\n      1,\n      2,\n      2,\n      2,\n      1,\n      2,\n      2,\n      1,\n      1,\n      2,\n      2,\n      1,\n      1,\n      2,\n      2,\n      1,\n      2,\n      1,\n      2,\n      2,\n      2,\n      2,\n      2,\n      1,\n      2,\n      1,\n      2,\n      1,\n      2,\n      1,\n      1,\n      2,\n      2,\n      2,\n      1,\n      2,\n      1,\n      1,\n      2,\n      1,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      1,\n      1,\n      1,\n      1,\n      2,\n      1,\n      2,\n      2,\n      2,\n      1,\n      1,\n      1,\n      2,\n      1,\n      1,\n      1,\n      1,\n      1,\n      2,\n      1,\n      1,\n      1,\n      1,\n      2,\n      1,\n      2,\n      2,\n      1,\n      2,\n      2,\n      2,\n      2,\n      1,\n      2,\n      1,\n      1,\n      1,\n      2,\n      2,\n      2,\n      1,\n      2,\n      1,\n      2,\n      1,\n      2,\n      2,\n      1,\n      1,\n      1,\n      2,\n      2,\n      2,\n      1,\n      1,\n      1,\n      2,\n      1,\n      1,\n      1,\n      2,\n      2,\n      2,\n      1,\n      2,\n      2,\n      2,\n      1,\n      2,\n      1,\n      1\n    )\n  )\n})\n\n\ntest_that(\"recode all NA\", {\n  x <- rep(NA, 10)\n  expect_message(\n    y <- categorize(x),\n    \"can't be recoded\"\n  )\n  expect_identical(y, x)\n\n  x <- rep(NA_real_, 10)\n  expect_message(\n    y <- categorize(x),\n    \"only missing values\"\n  )\n  expect_identical(y, x)\n})\n\n\ntest_that(\"recode numeric\", {\n  expect_identical(\n    categorize(mtcars$hp, split = c(100, 150)),\n    c(\n      2,\n      2,\n      1,\n      2,\n      3,\n      2,\n      3,\n      1,\n      1,\n      2,\n      2,\n      3,\n      3,\n      3,\n      3,\n      3,\n      3,\n      1,\n      1,\n      1,\n      1,\n      3,\n      3,\n      3,\n      3,\n      1,\n      1,\n      2,\n      3,\n      3,\n      3,\n      2\n    )\n  )\n  x <- mtcars$hp\n  x[mtcars$hp < 100] <- 1\n  x[mtcars$hp >= 100 & mtcars$hp < 150] <- 2\n  x[mtcars$hp >= 150] <- 3\n  expect_identical(categorize(mtcars$hp, split = c(100, 150)), x)\n  expect_identical(categorize(mtcars$hp, split = c(100, 150), lowest = NULL), x)\n\n  expect_identical(\n    categorize(mtcars$hp, split = \"equal_range\", range = 50, lowest = NULL),\n    c(\n      2,\n      2,\n      1,\n      2,\n      3,\n      2,\n      4,\n      1,\n      1,\n      2,\n      2,\n      3,\n      3,\n      3,\n      4,\n      4,\n      4,\n      1,\n      1,\n      1,\n      1,\n      2,\n      2,\n      4,\n      3,\n      1,\n      1,\n      2,\n      5,\n      3,\n      6,\n      2\n    )\n  )\n})\n\n# select helpers ------------------------------\ntest_that(\"categorize regex\", {\n  expect_identical(\n    categorize(mtcars, select = \"pg\", regex = TRUE),\n    categorize(mtcars, select = \"mpg\")\n  )\n})\n\n\n# labelling ranges ------------------------------\ntest_that(\"categorize labelling ranged\", {\n  data(mtcars)\n  expect_snapshot(categorize(mtcars$mpg, \"equal_length\", n_groups = 5))\n  expect_snapshot(categorize(\n    mtcars$mpg,\n    \"equal_length\",\n    n_groups = 5,\n    labels = \"range\"\n  ))\n  expect_snapshot(categorize(\n    mtcars$mpg,\n    \"equal_length\",\n    n_groups = 5,\n    labels = \"observed\"\n  ))\n})\n\ntest_that(\"categorize breaks\", {\n  data(mtcars)\n  expect_snapshot(categorize(\n    mtcars$mpg,\n    \"equal_length\",\n    n_groups = 5,\n    labels = \"range\",\n    breaks = \"inclusive\"\n  ))\n  expect_error(\n    categorize(mtcars$mpg, \"equal_length\", n_groups = 5, breaks = \"something\"),\n    regex = \"should be one of\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-center.R",
    "content": "test_that(\"center\", {\n  z <- center(iris$Sepal.Width)\n  expect_equal(\n    as.vector(z),\n    iris$Sepal.Width - mean(iris$Sepal.Width),\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"center, robust\", {\n  z <- center(mtcars$hp, robust = TRUE)\n  expect_equal(\n    as.vector(z),\n    mtcars$hp - median(mtcars$hp),\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"center, select\", {\n  z <- center(iris, select = \"Sepal.Width\")\n  expect_equal(\n    as.vector(z$Sepal.Width),\n    iris$Sepal.Width - mean(iris$Sepal.Width),\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n  # check class attributes\n  expect_identical(\n    vapply(z, class, character(1)),\n    c(\n      Sepal.Length = \"numeric\",\n      Sepal.Width = \"numeric\",\n      Petal.Length = \"numeric\",\n      Petal.Width = \"numeric\",\n      Species = \"factor\"\n    )\n  )\n})\n\ntest_that(\"center, factors\", {\n  z <- center(iris, select = \"Species\")\n  expect_identical(z$Species, iris$Species)\n})\n\ntest_that(\"center, force factors\", {\n  z <- center(iris, select = \"Species\", force = TRUE)\n  v <- as.numeric(iris$Species)\n  expect_equal(\n    as.vector(z$Species),\n    v - median(v),\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"center, all na\", {\n  z <- center(c(NA, NA, NA))\n  expect_identical(z, c(NA, NA, NA))\n})\n\ntest_that(\"center, with Inf\", {\n  z <- center(c(2, 4, Inf))\n  expect_equal(z, c(-1, 1, NA), ignore_attr = TRUE)\n})\n\ntest_that(\"center, all NA or Inf\", {\n  z <- center(c(NA, -Inf, Inf))\n  expect_equal(z, c(NA, -Inf, Inf), ignore_attr = TRUE)\n})\n\ntest_that(\"center works correctly with only one value\", {\n  expect_message(\n    x <- center(100), # nolint\n    \"will be set to 0\"\n  )\n  expect_equal(x, 0, ignore_attr = TRUE)\n\n  expect_equal(center(100, center = 1), 99, ignore_attr = TRUE)\n  expect_equal(\n    center(100, reference = mtcars$mpg),\n    100 - mean(mtcars$mpg),\n    ignore_attr = TRUE\n  )\n})\n\n\n# with grouped data -------------------------------------------\n\ntest_that(\"center (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard <- iris %>%\n    poorman::group_by(Species) %>%\n    center(Sepal.Width) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  manual <- iris %>%\n    poorman::group_by(Species) %>%\n    poorman::mutate(Sepal.Width = Sepal.Width - mean(Sepal.Width)) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  expect_identical(datawizard, manual)\n})\n\ntest_that(\"center (grouped data), with force = TRUE\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard_c <- iris %>%\n    poorman::group_by(Species) %>%\n    center(force = TRUE) %>%\n    poorman::ungroup()\n\n  manual_c <- iris %>%\n    poorman::group_by(Species) %>%\n    poorman::mutate(\n      Sepal.Length = Sepal.Length - mean(Sepal.Length),\n      Sepal.Width = Sepal.Width - mean(Sepal.Width),\n      Petal.Length = Petal.Length - mean(Petal.Length),\n      Petal.Width = Petal.Width - mean(Petal.Width)\n    ) %>%\n    poorman::ungroup()\n\n  expect_equal(datawizard_c, manual_c, ignore_attr = TRUE)\n})\n\ntest_that(\"center, robust (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard <- iris %>%\n    poorman::group_by(Species) %>%\n    center(Sepal.Width, robust = TRUE) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  manual <- iris %>%\n    poorman::group_by(Species) %>%\n    poorman::mutate(Sepal.Width = Sepal.Width - median(Sepal.Width)) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  expect_identical(datawizard, manual)\n})\n\ntest_that(\"center, select (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard <- iris %>%\n    poorman::group_by(Species) %>%\n    center(select = starts_with(\"Sepal\\\\.W\")) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  manual <- iris %>%\n    poorman::group_by(Species) %>%\n    poorman::mutate(Sepal.Width = Sepal.Width - mean(Sepal.Width)) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  expect_identical(datawizard, manual)\n})\n\ntest_that(\"center, factors (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard <- iris %>%\n    poorman::group_by(Species) %>%\n    center(select = \"Species\") %>%\n    poorman::ungroup() %>%\n    poorman::pull(Species)\n\n  manual <- poorman::pull(iris, Species)\n\n  expect_identical(datawizard, manual)\n})\n\n# select helpers ------------------------------\ntest_that(\"center regex\", {\n  expect_equal(\n    center(mtcars, select = \"pg\", regex = TRUE)$mpg,\n    center(mtcars$mpg),\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    center(mtcars, select = \"pg$\", regex = TRUE)$mpg,\n    center(mtcars$mpg),\n    ignore_attr = TRUE\n  )\n})\n\n# no matches ------------------------------\ntest_that(\"center no match\", {\n  data(iris)\n  expect_warning(center(iris, \"Sepla.Length\"))\n})\n"
  },
  {
    "path": "tests/testthat/test-coef_var.R",
    "content": "test_that(\"coefficient of variation works\", {\n  expect_equal(coef_var(1:10), 0.5504818826)\n  expect_equal(coef_var(1:10, method = \"unbiased\"), 0.5552700246)\n  expect_equal(coef_var(c(1:10, 100), method = \"median_mad\"), 0.7413)\n  expect_equal(coef_var(c(1:10, 100), method = \"qcd\"), 0.4166666667)\n  expect_identical(coef_var(mu = 10, sigma = 20), 2)\n  expect_equal(\n    coef_var(mu = 10, sigma = 20, method = \"unbiased\", n = 30),\n    2.250614348\n  )\n  expect_equal(distribution_coef_var(1:10), 0.5504818826)\n})\n\n\ntest_that(\"coef_var returns NULL if can't compute\", {\n  expect_warning(\n    {\n      x <- coef_var(as.Date(\"2022-10-31\"))\n    },\n    \"Can't compute\"\n  )\n  expect_null(x)\n})\n\n\ntest_that(\"coef_var: argument 'remove_na' works\", {\n  expect_identical(coef_var(c(1:10, NA)), NA_real_)\n\n  expect_identical(\n    coef_var(1:10),\n    coef_var(c(1:10, NA), remove_na = TRUE)\n  )\n})\n\ntest_that(\"coef_var: method 'unbiased' needs argument 'n' when sigma and mu are provided\", {\n  expect_error(\n    coef_var(1:10, method = \"unbiased\", mu = 10, sigma = 20),\n    \"A value for `n` must be provided\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-contr.deviation.R",
    "content": "test_that(\"contr.deviation\", {\n  c.treatment <- solve(cbind(Intercept = 1, contr.treatment(3)))\n  c.sum <- solve(cbind(Intercept = 1, contr.sum(3)))\n  c.deviation <- solve(cbind(Intercept = 1, contr.deviation(3)))\n\n  expect_equal(c.deviation[1, ], c.sum[1, ])\n  expect_equal(c.deviation[-1, ], c.treatment[-1, ])\n})\n\ntest_that(\"contr.deviation | snapshot\", {\n  skip_if_not_installed(\"base\", \"4.3\")\n  # IF THIS TESTS FAILS, UPDATE THE EXAMPLE\n\n  data(\"mtcars\")\n  mtcars <- data_modify(mtcars, cyl = factor(cyl))\n  mtcars <- data_modify(mtcars, am = factor(am))\n  mtcars <- data_arrange(mtcars, select = c(\"cyl\", \"am\"))\n\n  contrasts(mtcars$cyl) <- contr.deviation\n  c.deviation <- cbind(Intercept = 1, contrasts(mtcars$cyl))\n  expect_snapshot(solve(c.deviation))\n\n  mm <- unique(model.matrix(~ cyl * am, data = mtcars))\n  rownames(mm) <- c(\n    \"cyl4.am0\",\n    \"cyl4.am1\",\n    \"cyl6.am0\",\n    \"cyl6.am1\",\n    \"cyl8.am0\",\n    \"cyl8.am1\"\n  )\n\n  expect_snapshot(solve(mm))\n})\n"
  },
  {
    "path": "tests/testthat/test-convert_na_to.R",
    "content": "# numeric --------------------------\n\ntest_that(\"convert_na_to - numeric: works\", {\n  expect_identical(\n    convert_na_to(c(1, 2, 3, NA), replacement = 4),\n    as.double(1:4)\n  )\n\n  expect_warning(\n    expect_identical(\n      convert_na_to(c(1, 2, 3, NA), replacement = NULL),\n      c(1, 2, 3, NA)\n    ),\n    \"needs to be a numeric\"\n  )\n})\n\ntest_that(\"convert_na_to - numeric: arg 'replacement' can only be numeric\", {\n  expect_warning(\n    convert_na_to(c(1, 2, 3, NA), replacement = \"a\"),\n    regexp = \"`replacement` needs to be a numeric vector.\"\n  )\n  expect_warning(\n    convert_na_to(c(1, 2, 3, NA), replacement = factor(8)),\n    regexp = \"`replacement` needs to be a numeric vector.\"\n  )\n})\n\ntest_that(\"convert_na_to - numeric: arg 'replacement' must be of length one\", {\n  expect_warning(\n    convert_na_to(c(1, 2, 3, NA), replacement = c(1, 2)),\n    regexp = \"`replacement` needs to be of length one.\"\n  )\n})\n\n\ntest_that(\"convert_na_to - numeric: returns original vector if 'replacement' not good\", {\n  expect_warning(\n    expect_identical(\n      convert_na_to(c(1, 2, 3, NA), replacement = \"a\"),\n      c(1, 2, 3, NA)\n    ),\n    \"needs to be a numeric\"\n  )\n  expect_warning(\n    expect_identical(\n      convert_na_to(c(1, 2, 3, NA), replacement = factor(8)),\n      c(1, 2, 3, NA)\n    ),\n    \"needs to be a numeric\"\n  )\n})\n\n\n# character --------------------------\n\ntest_that(\"convert_na_to - character: works\", {\n  expect_identical(\n    convert_na_to(c(\"a\", \"b\", \"c\", NA), replacement = \"d\"),\n    c(\"a\", \"b\", \"c\", \"d\")\n  )\n  expect_warning(\n    expect_identical(\n      convert_na_to(c(\"a\", \"b\", \"c\", NA), replacement = NULL),\n      c(\"a\", \"b\", \"c\", NA)\n    ),\n    \"needs to be a character\"\n  )\n})\n\ntest_that(\"convert_na_to - character: arg 'replacement' can only be character\", {\n  expect_warning(\n    convert_na_to(c(\"a\", \"b\", \"c\", NA), replacement = mtcars),\n    regexp = \"`replacement` needs to be a character or numeric vector.\"\n  )\n  expect_warning(\n    convert_na_to(c(\"a\", \"b\", \"c\", NA), replacement = factor(8)),\n    regexp = \"`replacement` needs to be a character or numeric vector.\"\n  )\n})\n\ntest_that(\"convert_na_to - numeric: arg 'replacement' must be of length one\", {\n  expect_warning(\n    convert_na_to(c(\"a\", \"b\", \"c\", NA), replacement = c(\"d\", \"e\")),\n    regexp = \"`replacement` needs to be of length one.\"\n  )\n})\n\ntest_that(\"convert_na_to - character: returns original vector if 'replacement' not good\", {\n  expect_identical(\n    convert_na_to(c(\"a\", \"b\", \"c\", NA), replacement = 1),\n    c(\"a\", \"b\", \"c\", 1)\n  )\n  expect_warning(\n    expect_identical(\n      convert_na_to(c(\"a\", \"b\", \"c\", NA), replacement = mtcars),\n      c(\"a\", \"b\", \"c\", NA)\n    ),\n    \"needs to be a character or numeric vector\"\n  )\n  expect_warning(\n    expect_identical(\n      convert_na_to(c(\"a\", \"b\", \"c\", NA), replacement = factor(8)),\n      c(\"a\", \"b\", \"c\", NA)\n    ),\n    \"needs to be a character or numeric vector\"\n  )\n})\n\n\n# factor --------------------------\n\ntest_that(\"convert_na_to - factor: works when 'replacement' is numeric \", {\n  x <- convert_na_to(factor(c(1, 2, 3, NA)), replacement = 4)\n  expect_identical(\n    x,\n    factor(1:4)\n  )\n  expect_identical(levels(x), as.character(1:4))\n  expect_warning(\n    expect_identical(\n      convert_na_to(factor(c(1, 2, 3, NA)), replacement = NULL),\n      factor(c(1, 2, 3, NA))\n    ),\n    \"needs to be of length one\"\n  )\n})\n\ntest_that(\"convert_na_to - factor: works when 'replacement' is character\", {\n  x <- convert_na_to(factor(c(1, 2, 3, NA)), replacement = \"d\")\n  expect_identical(\n    x,\n    factor(c(1:3, \"d\"))\n  )\n  expect_identical(levels(x), as.character(c(1:3, \"d\")))\n})\n\n\n# data frame --------------------------\n\ntest <- data.frame(\n  x = c(1, 2, NA),\n  y = c(\"a\", \"b\", NA),\n  z = factor(c(\"a\", \"b\", NA)),\n  x2 = c(4, 5, NA),\n  stringsAsFactors = FALSE\n)\n\ntest_that(\"convert_na_to - data frame: works with replace_* args\", {\n  expect_identical(\n    convert_na_to(test, replace_num = 4, replace_char = \"e\", replace_fac = 8),\n    data.frame(\n      x = c(1, 2, 4),\n      y = c(\"a\", \"b\", \"e\"),\n      z = factor(c(\"a\", \"b\", \"8\"), levels = c(\"a\", \"b\", \"8\")),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\ntest_that(\"convert_na_to - data frame: only modifies numeric if only numeric specified\", {\n  expect_identical(\n    convert_na_to(test, replace_num = 4),\n    data.frame(\n      x = c(1, 2, 4),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\ntest_that(\"convert_na_to - data frame: only modifies character if only character specified\", {\n  expect_identical(\n    convert_na_to(test, replace_char = \"e\"),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", \"e\"),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, NA),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\ntest_that(\"convert_na_to - data frame: only modifies factor if only factor specified\", {\n  expect_identical(\n    convert_na_to(test, replace_fac = 8),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", \"8\"), levels = c(\"a\", \"b\", \"8\")),\n      x2 = c(4, 5, NA),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\ntest_that(\"convert_na_to - data frame: arg 'select' works\", {\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      select = \"x\"\n    ),\n    data.frame(\n      x = c(1, 2, 4),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, NA),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      select = ~x\n    ),\n    data.frame(\n      x = c(1, 2, 4),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, NA),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      select = starts_with(\"x\")\n    ),\n    data.frame(\n      x = c(1, 2, 4),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      select = ends_with(\"2\")\n    ),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      select = contains(\"x\")\n    ),\n    data.frame(\n      x = c(1, 2, 4),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      select = 1:3\n    ),\n    data.frame(\n      x = c(1, 2, 4),\n      y = c(\"a\", \"b\", \"e\"),\n      z = factor(c(\"a\", \"b\", \"8\"), levels = c(\"a\", \"b\", \"8\")),\n      x2 = c(4, 5, NA),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      select = regex(\"2$\")\n    ),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\n\ntest_that(\"convert_na_to - data frame: arg 'exclude' works\", {\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      exclude = \"x\"\n    ),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", \"e\"),\n      z = factor(c(\"a\", \"b\", \"8\"), levels = c(\"a\", \"b\", \"8\")),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      exclude = ~x\n    ),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", \"e\"),\n      z = factor(c(\"a\", \"b\", \"8\"), levels = c(\"a\", \"b\", \"8\")),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(\n      test,\n      replace_num = 4,\n      replace_char = \"e\",\n      replace_fac = 8,\n      select = starts_with(\"x\"),\n      exclude = ~x\n    ),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\ntest_that(\"convert_na_to - data frame: works when arg 'select' is a list\", {\n  # numeric\n  expect_identical(\n    convert_na_to(test, replace_num = 4, select = list(x = 0)),\n    data.frame(\n      x = c(1, 2, 0),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  # character\n  expect_identical(\n    convert_na_to(test, replace_char = \"e\", select = list(y = \"d\")),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", \"d\"),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, NA),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  # only named list can override replace_*\n  expect_identical(\n    convert_na_to(test, replace_num = 4, select = list(0)),\n    data.frame(\n      x = c(1, 2, 4),\n      y = c(\"a\", \"b\", NA),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  expect_identical(\n    convert_na_to(test, replace_char = \"e\", select = list(\"d\")),\n    data.frame(\n      x = c(1, 2, NA),\n      y = c(\"a\", \"b\", \"e\"),\n      z = factor(c(\"a\", \"b\", NA)),\n      x2 = c(4, 5, NA),\n      stringsAsFactors = FALSE\n    )\n  )\n\n  # no problem if put a variable that doesn't exist in list\n  expect_warning(\n    expect_identical(\n      convert_na_to(test, replace_num = 4, select = list(x = 0, foo = 5)),\n      data.frame(\n        x = c(1, 2, 0),\n        y = c(\"a\", \"b\", NA),\n        z = factor(c(\"a\", \"b\", NA)),\n        x2 = c(4, 5, 4),\n        stringsAsFactors = FALSE\n      )\n    ),\n    \"not found\"\n  )\n})\n\n\n# preserve attributes --------------------------\n\ntest_that(\"data_rename preserves attributes\", {\n  skip_if_not_installed(\"parameters\")\n\n  m <- lm(Sepal.Length ~ Species, data = iris)\n  out <- parameters::parameters(m)\n  a1 <- attributes(out)\n\n  out2 <- convert_na_to(out, replace_num = 5)\n  a2 <- attributes(out2)\n\n  expect_identical(names(a1)[1:28], names(a2)[1:28])\n})\n\n# select helpers ------------------------------\ntest_that(\"convert_na_to regex\", {\n  expect_identical(\n    convert_na_to(airquality, replacement = 0, select = \"zone\", regex = TRUE),\n    convert_na_to(airquality, replacement = 0, select = \"Ozone\")\n  )\n  expect_identical(\n    convert_na_to(airquality, replacement = 0, select = \"zone$\", regex = TRUE),\n    convert_na_to(airquality, replacement = 0, select = \"Ozone\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-convert_to_na.R",
    "content": "data(iris)\n\ntest_that(\"convert_to_na-factor\", {\n  x <- convert_to_na(iris$Species, na = \"versicolor\")\n  expect_identical(sum(is.na(x)), 50L)\n\n  x <- convert_to_na(iris$Species, na = list(2, \"versicolor\"))\n  expect_identical(sum(is.na(x)), 50L)\n\n  x <- convert_to_na(\n    iris$Species,\n    na = list(2, \"versicolor\"),\n    drop_levels = FALSE\n  )\n  expect_identical(levels(x), c(\"setosa\", \"versicolor\", \"virginica\"))\n  expect_identical(as.vector(table(x)), c(50L, 0L, 50L))\n\n  x <- convert_to_na(\n    iris$Species,\n    na = list(2, \"versicolor\"),\n    drop_levels = TRUE\n  )\n  expect_identical(levels(x), c(\"setosa\", \"virginica\"))\n  expect_identical(as.vector(table(x)), c(50L, 50L))\n\n  expect_message(\n    x <- convert_to_na(iris$Species, na = 2), # nolint\n    \"for a factor or character variable\"\n  )\n  expect_identical(sum(is.na(x)), 0L)\n})\n\ntest_that(\"convert_to_na-numeric\", {\n  x <- convert_to_na(iris$Sepal.Length, na = 5)\n  expect_identical(sum(is.na(x)), sum(iris$Sepal.Length == 5))\n\n  x <- convert_to_na(iris$Sepal.Length, na = list(5, \"versicolor\"))\n  expect_identical(sum(is.na(x)), 10L)\n\n  x <- convert_to_na(iris$Sepal.Width, na = \"a\", verbose = FALSE)\n  expect_message(\n    convert_to_na(iris$Sepal.Width, na = \"a\"),\n    \"needs to be a numeric vector\"\n  )\n  expect_identical(sum(is.na(x)), 0L)\n})\n\ntest_that(\"convert_to_na-df\", {\n  expect_message(\n    x <- convert_to_na(iris, na = 5), # nolint\n    \"needs to be a character vector\"\n  )\n  expect_identical(\n    sum(is.na(x)),\n    sum(vapply(iris, function(i) sum(i == 5), FUN.VALUE = integer(1L)))\n  )\n\n  x <- convert_to_na(iris, na = list(5, \"versicolor\"))\n  expect_identical(sum(is.na(x)), 64L)\n\n  data(iris)\n  expect_message(\n    x <- convert_to_na(iris, na = 3), # nolint\n    \"needs to be a character vector\"\n  )\n  expect_identical(\n    sum(is.na(x)),\n    sum(vapply(\n      iris,\n      function(i) {\n        if (is.numeric(i)) {\n          sum(i == 3)\n        } else {\n          0L\n        }\n      },\n      FUN.VALUE = integer(1L)\n    ))\n  )\n\n  x <- convert_to_na(iris, na = list(3, \"3\"))\n  expect_identical(sum(is.na(x)), 27L)\n})\n\n\ntest_that(\"convert_to_na other classes\", {\n  d <- data.frame(\n    a = 1:5,\n    b = factor(letters[1:5]),\n    c = as.Date(c(\n      \"2022-03-22\",\n      \"2022-01-02\",\n      \"2022-02-02\",\n      \"2021-04-02\",\n      \"2020-01-19\"\n    )),\n    d = c(TRUE, TRUE, FALSE, FALSE, TRUE),\n    e = as.complex(1:5)\n  )\n\n  x <- convert_to_na(d$a, na = 3)\n  expect_equal(x, c(1, 2, NA, 4, 5), tolerance = 1e-3, ignore_attr = TRUE)\n  expect_message(\n    x <- convert_to_na(d$a, na = \"c\"),\n    \"needs to be a numeric vector\"\n  ) # nolint\n  expect_equal(x, 1:5, tolerance = 1e-3, ignore_attr = TRUE)\n\n  x <- convert_to_na(d$b, na = \"c\")\n  expect_equal(\n    x,\n    structure(\n      c(1L, 2L, NA, 4L, 5L),\n      .Label = c(\"a\", \"b\", \"c\", \"d\", \"e\"),\n      class = \"factor\"\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  x <- convert_to_na(d$b, na = \"c\", drop_levels = TRUE)\n  expect_equal(\n    x,\n    structure(\n      c(1L, 2L, NA, 3L, 4L),\n      .Label = c(\"a\", \"b\", \"d\", \"e\"),\n      class = \"factor\"\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  expect_message(\n    convert_to_na(d$c, na = \"2022-03-22\"),\n    \"of class 'Date'\"\n  )\n  x <- convert_to_na(d$c, na = as.Date(\"2022-03-22\"))\n  expect_equal(\n    x,\n    structure(c(NA, 18994, 19025, 18719, 18280), class = \"Date\"),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  x <- convert_to_na(d$d, na = TRUE)\n  expect_equal(\n    x,\n    c(NA, NA, FALSE, FALSE, NA),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  expect_message(\n    x <- convert_to_na(d$e, na = as.complex(4)), # nolint\n    \"variables of class `complex`\"\n  )\n  expect_equal(x, d$e, tolerance = 1e-3, ignore_attr = TRUE)\n\n  out <- data.frame(\n    a = c(1, 2, NA, 4, 5),\n    b = factor(c(\"a\", \"b\", NA, \"d\", \"e\"), levels = letters[1:5]),\n    c = as.Date(c(\n      \"2022-03-22\",\n      \"2022-01-02\",\n      \"2022-02-02\",\n      \"2021-04-02\",\n      \"2020-01-19\"\n    )),\n    d = c(NA, NA, FALSE, FALSE, NA),\n    e = as.complex(1:5)\n  )\n  convert_to_na(d, na = list(3, \"c\", TRUE, \"2022-01-02\"), verbose = FALSE)\n  x <- convert_to_na(\n    d,\n    na = list(3, \"c\", TRUE, as.Date(\"2022-01-02\")),\n    verbose = FALSE\n  )\n  expect_equal(x, out, ignore_attr = TRUE, tolerance = 1e-3)\n})\n\n# select helpers ------------------------------\ntest_that(\"convert_to_na regex\", {\n  expect_identical(\n    convert_to_na(mtcars, na = 4, select = \"arb\", regex = TRUE),\n    convert_to_na(mtcars, na = 4, select = \"carb\")\n  )\n  expect_identical(\n    convert_to_na(mtcars, na = 4, select = \"arb$\", regex = TRUE),\n    convert_to_na(mtcars, na = 4, select = \"carb\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_addprefix.R",
    "content": "test_that(\"data_addprefix works as expected\", {\n  expect_equal(\n    names(head(data_addprefix(iris, \"NEW_\"))),\n    c(\n      \"NEW_Sepal.Length\",\n      \"NEW_Sepal.Width\",\n      \"NEW_Petal.Length\",\n      \"NEW_Petal.Width\",\n      \"NEW_Species\"\n    )\n  )\n\n  expect_equal(\n    names(head(data_addsuffix(iris, \"_OLD\"))),\n    c(\n      \"Sepal.Length_OLD\",\n      \"Sepal.Width_OLD\",\n      \"Petal.Length_OLD\",\n      \"Petal.Width_OLD\",\n      \"Species_OLD\"\n    )\n  )\n\n  expect_equal(\n    names(head(data_addprefix(iris, \"NEW_\", select = starts_with(\"Sepal\")))),\n    c(\n      \"NEW_Sepal.Length\",\n      \"NEW_Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\"\n    )\n  )\n\n  expect_equal(\n    names(head(data_addsuffix(iris, \"_OLD\", select = starts_with(\"Petal\")))),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length_OLD\",\n      \"Petal.Width_OLD\",\n      \"Species\"\n    )\n  )\n})\n\n# select helpers ------------------------------\ntest_that(\"data_addprefix regex\", {\n  expect_equal(\n    data_addsuffix(mtcars, \"_regex\", select = \"pg\", regex = TRUE),\n    data_addsuffix(mtcars, \"_regex\", select = \"mpg\")\n  )\n  expect_equal(\n    data_addsuffix(mtcars, select = \"pg$\", \"_regex\", regex = TRUE),\n    data_addsuffix(mtcars, select = \"mpg\", \"_regex\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_arrange.R",
    "content": "df <- head(mtcars)\ndf$character <- c(\"a\", \"b\", \"b\", \"c\", \"c\", \"a\")\n\ntest_that(\"data_arrange works with one numeric column\", {\n  skip_if_not_installed(\"poorman\")\n  expect_identical(\n    poorman::arrange(df, carb),\n    data_arrange(df, \"carb\")\n  )\n  expect_identical(\n    poorman::arrange(df, -carb),\n    data_arrange(df, \"-carb\")\n  )\n})\n\ntest_that(\"data_arrange works with one character column\", {\n  skip_if_not_installed(\"poorman\")\n  expect_identical(\n    poorman::arrange(df, character),\n    data_arrange(df, \"character\")\n  )\n  expect_identical(\n    poorman::arrange(df, desc(character)),\n    data_arrange(df, \"-character\")\n  )\n})\n\ntest_that(\"data_arrange works with several columns\", {\n  skip_if_not_installed(\"poorman\")\n  expect_identical(\n    poorman::arrange(df, carb, gear),\n    data_arrange(df, c(\"carb\", \"gear\"))\n  )\n  expect_identical(\n    poorman::arrange(df, -carb, gear),\n    data_arrange(df, c(\"-carb\", \"gear\"))\n  )\n  expect_identical(\n    poorman::arrange(df, -carb, desc(character)),\n    data_arrange(df, c(\"-carb\", \"-character\"))\n  )\n})\n\ntest_that(\"data_arrange works without columns\", {\n  expect_identical(data_arrange(df), df)\n})\n\ntest_that(\"data_arrange ignores wrong names if safe = TRUE\", {\n  expect_warning(\n    expect_identical(data_arrange(df, \"foo\"), df),\n    regexp = \"don't exist\"\n  )\n\n  expect_warning(\n    expect_identical(\n      data_arrange(df, c(\"gear\", \"foo\")),\n      data_arrange(df, \"gear\")\n    ),\n    regexp = \"don't exist\"\n  )\n})\n\ntest_that(\"data_arrange errors if safe = FALSE\", {\n  expect_error(data_arrange(df, \"foo\", safe = FALSE))\n})\n\ntest_that(\"data_arrange errors if not coercable to data frame\", {\n  expect_error(data_arrange(list(a = 1:5, b = letters[1:3]), select = \"b\"))\n  expect_equal(\n    data_arrange(list(a = 1:5, b = letters[5:1]), select = \"b\"),\n    structure(\n      list(a = 5:1, b = c(\"a\", \"b\", \"c\", \"d\", \"e\")),\n      row.names = 5:1,\n      class = \"data.frame\"\n    ),\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"data_arrange works with grouped df\", {\n  set.seed(123)\n  x <- mtcars[\n    sample(seq_len(nrow(mtcars)), 10, replace = TRUE),\n    c(\"cyl\", \"mpg\")\n  ]\n  g <- data_group(x, cyl)\n\n  expected <- data.frame(\n    cyl = c(4, 4, 4, 6, 6, 8, 8, 8, 8, 8),\n    mpg = c(22.8, 30.4, 32.4, 17.8, 19.2, 10.4, 15, 15.2, 15.5, 18.7)\n  )\n  class(expected) <- c(\"grouped_df\", \"data.frame\")\n  rownames(expected) <- c(\n    \"Datsun 710\",\n    \"Honda Civic\",\n    \"Fiat 128\",\n    \"Merc 280C\",\n    \"Merc 280\",\n    \"Cadillac Fleetwood\",\n    \"Maserati Bora\",\n    \"Merc 450SLC\",\n    \"Dodge Challenger\",\n    \"Hornet Sportabout\"\n  )\n  attributes(expected)$groups <- attributes(g)$groups\n\n  expect_identical(\n    data_arrange(g, \"mpg\"),\n    expected,\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"data_arrange works with NA\", {\n  # without groups\n\n  tmp <- data.frame(\n    a = c(1, 2, 2, 8, 1, 3),\n    b = c(1, NA, 3, 3, NA, 5)\n  )\n\n  expect_identical(\n    data_arrange(tmp, \"a\"),\n    data.frame(\n      a = c(1, 1, 2, 2, 3, 8),\n      b = c(1, NA, NA, 3, 5, 3)\n    )\n  )\n\n  # with groups\n\n  g <- data_group(tmp, \"b\")\n\n  expected <- data.frame(\n    a = c(1, 2, 8, 3, 1, 2),\n    b = c(1, 3, 3, 5, NA, NA)\n  )\n  class(expected) <- c(\"grouped_df\", \"data.frame\")\n  attributes(expected)$groups <- attributes(g)$groups\n\n  expect_identical(\n    data_arrange(g, \"a\"),\n    expected,\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"data_arrange works one-column data frames (and does not drop dimensions)\", {\n  data(mtcars)\n  expect_s3_class(data_arrange(mtcars[\"gear\"], select = \"gear\"), \"data.frame\")\n  expect_s3_class(\n    data_arrange(mtcars[c(\"gear\", \"cyl\")], select = \"gear\"),\n    \"data.frame\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_codebook.R",
    "content": "data(efc)\ndata(iris)\n\ntest_that(\"data_codebook iris\", {\n  expect_snapshot(data_codebook(iris))\n})\n\n\ntest_that(\"data_codebook iris, reordered\", {\n  expect_snapshot(data_codebook(iris[c(1, 2, 5, 3, 4)]))\n})\n\n\ntest_that(\"data_codebook NaN and Inf\", {\n  d <- data.frame(\n    x = c(1, 4, NA, Inf, 4, NaN, 2, 1, 1)\n  )\n  expect_snapshot(data_codebook(d))\n\n  set.seed(123)\n  d <- data.frame(\n    x = c(sample.int(15, 100, TRUE), Inf, Inf)\n  )\n  expect_snapshot(data_codebook(d))\n  expect_snapshot(data_codebook(d, range_at = 100))\n  expect_snapshot(data_codebook(d, range_at = 100, max_values = 4))\n})\n\n\ntest_that(\"data_codebook, tinytable\", {\n  skip_if_not_installed(\"tinytable\")\n  d <- data.frame(\n    x = c(1, 4, NA, Inf, 4, NaN, 2, 1, 1)\n  )\n  expect_snapshot(display(data_codebook(d), format = \"tt\"))\n\n  set.seed(123)\n  d <- data.frame(\n    x = c(sample.int(15, 100, TRUE), Inf, Inf)\n  )\n  expect_snapshot(display(data_codebook(d), format = \"tt\"))\n  expect_snapshot(display(data_codebook(d, range_at = 100), format = \"tt\"))\n  expect_snapshot(display(\n    data_codebook(d, range_at = 100, max_values = 4),\n    format = \"tt\"\n  ))\n\n  data(iris)\n  expect_snapshot(display(data_codebook(iris[c(1, 2, 5, 3, 4)]), format = \"tt\"))\n})\n\n\ntest_that(\"data_codebook iris, select\", {\n  expect_snapshot(data_codebook(iris, select = starts_with(\"Sepal\")))\n})\n\n\ntest_that(\"data_codebook iris, select, ID\", {\n  expect_snapshot(data_codebook(iris, select = starts_with(\"Petal\")))\n})\n\n\ntest_that(\"data_codebook efc\", {\n  expect_snapshot(print(data_codebook(efc), table_width = Inf))\n  expect_snapshot(print(\n    data_codebook(efc),\n    table_width = \"auto\",\n    remove_duplicates = FALSE\n  ))\n  expect_snapshot(print(\n    data_codebook(efc),\n    table_width = \"auto\",\n    remove_duplicates = TRUE\n  ))\n})\n\n\ntest_that(\"data_codebook efc, variable_label_width\", {\n  out <- data_codebook(efc, variable_label_width = 30)\n  expect_snapshot(print(out, table_width = Inf))\n  expect_snapshot(print(out, table_width = \"auto\", remove_duplicates = FALSE))\n  expect_snapshot(print(out, table_width = \"auto\", remove_duplicates = TRUE))\n})\n\n\ntest_that(\"data_codebook efc, value_label_width\", {\n  out <- data_codebook(efc, variable_label_width = 30, value_label_width = 15)\n  expect_snapshot(print(out, table_width = Inf))\n  expect_snapshot(print(out, table_width = \"auto\", remove_duplicates = FALSE))\n  expect_snapshot(print(out, table_width = \"auto\", remove_duplicates = TRUE))\n})\n\n\ntest_that(\"data_codebook truncated data\", {\n  set.seed(123)\n  d <- data.frame(\n    a = sample.int(15, 100, TRUE),\n    b = sample(letters[1:18], 100, TRUE),\n    stringsAsFactors = FALSE\n  )\n  expect_snapshot(data_codebook(d, max_values = 5))\n})\n\n\ntest_that(\"data_codebook mixed numeric lengths\", {\n  set.seed(123)\n  d <- data.frame(\n    a = sample.int(4, 100, TRUE),\n    b = sample(5:15, 100, TRUE),\n    stringsAsFactors = FALSE\n  )\n  expect_snapshot(data_codebook(d))\n})\n\ntest_that(\"data_codebook mixed range_at\", {\n  set.seed(123)\n  d <- data.frame(\n    a = sample.int(4, 100, TRUE),\n    b = sample(5:15, 100, TRUE),\n    stringsAsFactors = FALSE\n  )\n  expect_snapshot(data_codebook(d, range_at = 3))\n})\n\n\ntest_that(\"data_codebook logicals\", {\n  set.seed(123)\n  d <- data.frame(\n    a = sample.int(15, 100, TRUE),\n    b = sample(letters[1:3], 100, TRUE),\n    c = sample(c(TRUE, FALSE), 100, TRUE),\n    stringsAsFactors = FALSE\n  )\n  expect_snapshot(data_codebook(d))\n})\n\n\ntest_that(\"data_codebook labelled data exceptions\", {\n  set.seed(123)\n\n  f1 <- sample.int(5, 100, TRUE)\n  f1[f1 == 4] <- NA\n  attr(f1, \"labels\") <- setNames(1:5, c(\"One\", \"Two\", \"Three\", \"Four\", \"Five\"))\n\n  f2 <- sample.int(5, 100, TRUE)\n  attr(f2, \"labels\") <- setNames(c(1:3, 5), c(\"One\", \"Two\", \"Three\", \"Five\"))\n\n  f3 <- sample.int(5, 100, TRUE)\n  attr(f3, \"labels\") <- setNames(1:5, c(\"One\", \"Two\", \"Three\", \"Four\", \"Five\"))\n\n  d <- data.frame(f1, f2, f3)\n  expect_snapshot(data_codebook(d))\n})\n\n\ntest_that(\"data_codebook labelled data factors\", {\n  set.seed(123)\n\n  f1 <- factor(sample(c(\"c\", \"b\", \"a\"), 100, TRUE))\n  attr(f1, \"labels\") <- setNames(c(\"c\", \"b\", \"a\"), c(\"Cee\", \"Bee\", \"A\"))\n\n  f2 <- factor(sample(c(\"a\", \"b\", \"c\"), 100, TRUE))\n  attr(f2, \"labels\") <- setNames(c(\"c\", \"b\", \"a\"), c(\"Cee\", \"Bee\", \"A\"))\n\n  f3 <- factor(sample(c(\"c\", \"b\", \"a\"), 100, TRUE))\n  attr(f3, \"labels\") <- setNames(c(\"a\", \"c\", \"b\"), c(\"A\", \"Cee\", \"Bee\"))\n\n  d <- data.frame(f1, f2, f3)\n  expect_snapshot(data_codebook(d))\n})\n\n\ntest_that(\"data_codebook works with numbers < 1\", {\n  d <- data.frame(\n    a = c(1, 1, 2, 2, 3, 3),\n    b = c(0, 0, 0, 1, 1, 2)\n  )\n  expect_snapshot(data_codebook(d))\n})\n\n\ntest_that(\"data_codebook, big marks\", {\n  set.seed(123)\n  f1 <- factor(sample(c(\"c\", \"b\", \"a\"), 1e6, TRUE))\n  f2 <- factor(sample.int(3, 1e6, TRUE))\n  d <- data.frame(f1, f2)\n  expect_snapshot(data_codebook(d))\n})\n\n\ntest_that(\"data_codebook, tagged NA\", {\n  skip_if_not_installed(\"haven\")\n  x <- haven::labelled(\n    x = c(\n      1:3,\n      haven::tagged_na(\"a\", \"c\", \"z\"),\n      4:1,\n      haven::tagged_na(\"a\", \"a\", \"c\"),\n      1:3,\n      haven::tagged_na(\"z\", \"c\", \"c\"),\n      1:4,\n      haven::tagged_na(\"a\", \"c\", \"z\")\n    ),\n    labels = c(\n      Agreement = 1,\n      Disagreement = 4,\n      First = haven::tagged_na(\"c\"),\n      Refused = haven::tagged_na(\"a\"),\n      `Not home` = haven::tagged_na(\"z\")\n    )\n  )\n  expect_snapshot(data_codebook(data.frame(x)))\n\n  x <- haven::labelled(\n    x = c(\n      1:3,\n      haven::tagged_na(\"a\", \"c\"),\n      4:1,\n      haven::tagged_na(\"a\", \"a\", \"c\"),\n      1:3,\n      haven::tagged_na(\"c\", \"c\"),\n      1:4,\n      haven::tagged_na(\"a\", \"c\")\n    ),\n    labels = c(\n      Agreement = 1,\n      Disagreement = 4,\n      First = haven::tagged_na(\"c\"),\n      Refused = haven::tagged_na(\"a\"),\n      `Not home` = haven::tagged_na(\"z\")\n    )\n  )\n  expect_snapshot(data_codebook(data.frame(x)))\n})\n\n\ntest_that(\"data_codebook, negative label values #334\", {\n  skip_if_not_installed(\"haven\")\n  x1 <- haven::labelled(\n    x = 1:4,\n    labels = c(Agreement = 1, Disagreement = 4, Missing = -9)\n  )\n  x2 <- haven::labelled(\n    x = c(1:3, -9),\n    labels = c(Agreement = 1, Disagreement = 4, Missing = -9)\n  )\n  expect_snapshot(data_codebook(data.frame(x1, x2)))\n})\n\n\ntest_that(\"data_codebook, informative warning if no match\", {\n  data(iris)\n  expect_warning(\n    data_codebook(iris, select = starts_with(\"abc\")),\n    regex = \"No column names that matched\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_duplicated.R",
    "content": "# Preparations\n\ndf1 <- data.frame(\n  id = c(1, 2, 3, 1, 3),\n  year = c(2022, 2022, 2022, 2022, 2000),\n  item1 = c(NA, 1, 1, 2, 3),\n  item2 = c(NA, 1, 1, 2, 3),\n  item3 = c(NA, 1, 1, 2, 3)\n)\n\nexpected1 <- data.frame(\n  Row = c(1, 4, 3, 5),\n  id = c(1, 1, 3, 3),\n  year = c(2022, 2022, 2022, 2000),\n  item1 = c(NA, 2, 1, 3),\n  item2 = c(NA, 2, 1, 3),\n  item3 = c(NA, 2, 1, 3),\n  count_na = c(3, 0, 0, 0)\n)\n\nexpected2 <- data.frame(\n  Row = c(1, 4),\n  id = c(1, 1),\n  year = c(2022, 2022),\n  item1 = c(NA, 2),\n  item2 = c(NA, 2),\n  item3 = c(NA, 2),\n  count_na = c(3, 0)\n)\n\n# Testing\n\ntest_that(\"data_duplicated basic\", {\n  x <- data_duplicated(df1, select = \"id\")\n  rownames(x) <- NULL\n  expect_equal(\n    x,\n    expected1\n  )\n})\n\ntest_that(\"data_duplicated unquoted\", {\n  x <- data_duplicated(df1, select = id)\n  rownames(x) <- NULL\n  expect_equal(\n    x,\n    expected1\n  )\n})\n\ntest_that(\"data_duplicated vector\", {\n  x <- data_duplicated(df1, select = 1)\n  rownames(x) <- NULL\n  expect_equal(\n    x,\n    expected1\n  )\n})\n\ntest_that(\"data_duplicated select-helper\", {\n  x <- data_duplicated(df1, select = contains(\"id\"))\n  rownames(x) <- NULL\n  expect_equal(\n    x,\n    expected1\n  )\n})\n\ntest_that(\"data_duplicated multiple IDs\", {\n  x <- data_duplicated(df1, select = c(\"id\", \"year\"))\n  rownames(x) <- NULL\n  expect_equal(\n    x,\n    expected2\n  )\n})\n\ntest_that(\"data_duplicated multiple IDs formula\", {\n  x <- data_duplicated(df1, select = ~ id + year)\n  rownames(x) <- NULL\n  expect_equal(\n    x,\n    expected2\n  )\n})\n\ntest_that(\"data_duplicated multiple IDs vector\", {\n  x <- data_duplicated(df1, select = 1:2)\n  rownames(x) <- NULL\n  expect_equal(\n    x,\n    expected2\n  )\n})\n\ntest_that(\"data_unique works with groups\", {\n  df <- data.frame(\n    g = c(1, 1, 2, 2),\n    x = c(1, 1, 2, 1)\n  )\n  df <- data_group(df, \"g\")\n\n  expected <- data.frame(\n    Row = 1:2,\n    g = c(1, 1),\n    x = c(1, 1),\n    count_na = c(0, 0)\n  )\n  expected <- data_group(expected, \"g\")\n\n  expect_identical(data_duplicated(df, \"x\"), expected, ignore_attr = TRUE)\n})\n"
  },
  {
    "path": "tests/testthat/test-data_extract.R",
    "content": "data(efc)\n\ntest_that(\"data_extract works with select-length > 1\", {\n  # works with multiple selects\n  expect_s3_class(\n    data_extract(efc, select = c(\"e42dep\", \"c172code\")),\n    \"data.frame\"\n  )\n\n  # colnames properly set\n  expect_named(\n    data_extract(efc, select = c(\"e42dep\", \"c172code\")),\n    c(\"e42dep\", \"c172code\")\n  )\n\n  # properly extract vector, w/o naming\n  expect_identical(data_extract(efc, select = \"e42dep\"), efc$e42dep)\n\n  # properly extract vector, with naming\n  x <- data_extract(efc, select = \"e42dep\", name = \"c172code\")\n  expect_named(x, as.character(efc$c172code))\n})\n\n\ntest_that(\"data_extract works with select-helpers\", {\n  expect_identical(\n    data_extract(iris, starts_with(\"Sepal\")),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n\n  expect_identical(\n    data_extract(iris, 1:3),\n    iris[1:3]\n  )\n\n  expect_identical(\n    data_extract(iris, \"Species\"),\n    iris$Species\n  )\n\n  expect_identical(\n    data_extract(iris, contains(\"Wid\")),\n    iris[c(\"Sepal.Width\", \"Petal.Width\")]\n  )\n\n  expect_identical(\n    data_extract(iris, Sepal.Width),\n    iris$Sepal.Width\n  )\n})\n\n\ntest_that(\"data_extract works with formulas\", {\n  expect_identical(\n    data_extract(iris, ~ Sepal.Width + Species),\n    iris[c(\"Sepal.Width\", \"Species\")]\n  )\n})\n\n\ntest_that(\"data_extract from other functions\", {\n  test_fun <- function(data, i) {\n    data_extract(data, select = i)\n  }\n  expect_identical(\n    test_fun(iris, c(\"Sepal.Length\", \"Sepal.Width\")),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n})\n\n\ntest_that(\"data_extract extract, pull\", {\n  expect_identical(\n    data_extract(iris, starts_with(\"Sepal\")),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n\n  expect_identical(\n    data_extract(iris, starts_with(\"Sepal\"), extract = \"first\"),\n    iris$Sepal.Length\n  )\n\n  expect_identical(\n    data_extract(iris, starts_with(\"Sepal\"), extract = \"last\"),\n    iris$Sepal.Width\n  )\n\n  expect_identical(\n    data_extract(\n      iris,\n      starts_with(\"Sepal\"),\n      extract = \"last\",\n      as_data_frame = TRUE\n    ),\n    iris[\"Sepal.Width\"]\n  )\n\n  expect_identical(\n    colnames(data_extract(mtcars, contains(\"a\"))),\n    c(\"drat\", \"am\", \"gear\", \"carb\")\n  )\n\n  expect_identical(\n    colnames(data_extract(mtcars, contains(\"a\"), extract = \"odd\")),\n    c(\"drat\", \"gear\")\n  )\n\n  expect_identical(\n    colnames(data_extract(mtcars, contains(\"a\"), extract = \"even\")),\n    c(\"am\", \"carb\")\n  )\n\n  expect_identical(\n    colnames(data_extract(mtcars, cyl:drat)),\n    c(\"cyl\", \"disp\", \"hp\", \"drat\")\n  )\n\n  expect_error(colnames(data_extract(mtcars, Cyl:Drat)))\n  expect_identical(\n    colnames(data_extract(mtcars, Cyl:Drat, ignore_case = TRUE)),\n    c(\"cyl\", \"disp\", \"hp\", \"drat\")\n  )\n\n  expect_identical(\n    colnames(data_extract(iris, contains(\"Sep\"))),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  expect_null(colnames(data_extract(iris, contains(\"sep\"))))\n\n  expect_identical(\n    colnames(data_extract(iris, contains(\"sep\"), ignore_case = TRUE)),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  expect_identical(\n    colnames(data_extract(iris, c(1:2, 5))),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Species\")\n  )\n})\n\n# select helpers ------------------------------\ntest_that(\"data_extract regex\", {\n  expect_identical(\n    data_extract(mtcars, select = \"pg\", regex = TRUE),\n    data_extract(mtcars, select = \"mpg\")\n  )\n  expect_identical(\n    data_extract(mtcars, select = \"pg$\", regex = TRUE),\n    data_extract(mtcars, select = \"mpg\")\n  )\n})\n\n\ntest_that(\"data_extract: 'name' is numeric\", {\n  expect_identical(\n    data_extract(mtcars, \"gear\", 1),\n    data_extract(mtcars, \"gear\", \"mpg\")\n  )\n  expect_identical(\n    data_extract(mtcars, \"gear\", -2),\n    data_extract(mtcars, \"gear\", \"gear\")\n  )\n  expect_identical(\n    data_extract(mtcars, \"gear\", 0),\n    data_extract(mtcars, \"gear\", \"row.names\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_group.R",
    "content": "data(efc)\n\ntest_that(\"data_group attributes\", {\n  x <- data_group(efc, \"c172code\")\n  expect_identical(\n    attributes(x)$groups,\n    structure(\n      list(\n        c172code = c(1, 2, 3, NA),\n        .rows = list(\n          c(3L, 14L, 30L, 32L, 36L, 77L, 91L, 99L),\n          c(\n            1L,\n            2L,\n            4L,\n            5L,\n            6L,\n            7L,\n            8L,\n            10L,\n            11L,\n            12L,\n            16L,\n            17L,\n            18L,\n            21L,\n            22L,\n            23L,\n            24L,\n            25L,\n            26L,\n            28L,\n            29L,\n            31L,\n            33L,\n            34L,\n            35L,\n            37L,\n            38L,\n            39L,\n            40L,\n            42L,\n            44L,\n            45L,\n            46L,\n            47L,\n            50L,\n            51L,\n            52L,\n            53L,\n            54L,\n            56L,\n            57L,\n            59L,\n            60L,\n            62L,\n            65L,\n            68L,\n            69L,\n            71L,\n            72L,\n            73L,\n            76L,\n            78L,\n            80L,\n            81L,\n            82L,\n            83L,\n            84L,\n            85L,\n            86L,\n            87L,\n            88L,\n            90L,\n            92L,\n            93L,\n            96L,\n            100L\n          ),\n          c(\n            13L,\n            15L,\n            19L,\n            20L,\n            27L,\n            41L,\n            43L,\n            55L,\n            58L,\n            64L,\n            66L,\n            67L,\n            74L,\n            75L,\n            79L,\n            89L\n          ),\n          c(9L, 48L, 49L, 61L, 63L, 70L, 94L, 95L, 97L, 98L)\n        )\n      ),\n      row.names = c(2L, 1L, 4L, 3L),\n      class = \"data.frame\",\n      .drop = TRUE\n    )\n  )\n  expect_s3_class(x, \"grouped_df\")\n})\n\n\ntest_that(\"data_group attributes\", {\n  skip_if_not_installed(\"poorman\")\n  x <- data_group(efc, \"c172code\")\n  out <- poorman::summarise(x, mw = mean(c12hour, na.rm = TRUE))\n  expect_equal(out$mw, c(87.125, 94.046875, 75), tolerance = 1e-3)\n})\n\n# select helpers ------------------------------\ntest_that(\"data_group regex\", {\n  expect_identical(\n    attributes(data_group(mtcars, select = \"yl\", regex = TRUE))$groups[[1]],\n    sort(unique(mtcars$cyl))\n  )\n})\n\n\ntest_that(\"data_ungroup works\", {\n  x <- data_group(efc, \"c172code\")\n  attr(x, \"foo\") <- TRUE\n\n  ungrouped <- data_ungroup(x)\n  expect_false(inherits(ungrouped, \"grouped_df\"))\n  expect_true(attributes(x)$foo)\n})\n"
  },
  {
    "path": "tests/testthat/test-data_match.R",
    "content": "data(efc, package = \"datawizard\")\n\ntest_that(\"data_match works as expected\", {\n  matching_rows <- data_match(\n    mtcars,\n    data.frame(vs = 0, am = 1),\n    return_indices = TRUE\n  )\n  df1 <- mtcars[matching_rows, ]\n  expect_identical(unique(df1$vs), 0)\n  expect_identical(unique(df1$am), 1)\n\n  matching_rows <- data_match(\n    mtcars,\n    data.frame(vs = 0, am = c(0, 1)),\n    return_indices = TRUE\n  )\n  df2 <- mtcars[matching_rows, ]\n  expect_identical(unique(df2$vs), 0)\n  expect_identical(unique(df2$am), c(1, 0))\n})\n\n\ntest_that(\"data_match works with missing data\", {\n  skip_if_not_installed(\"poorman\")\n\n  # \"OR\" works\n  x1 <- length(data_match(\n    efc,\n    data.frame(c172code = 1, e16sex = 2),\n    match = \"or\",\n    return_indices = TRUE\n  ))\n  x2 <- nrow(poorman::filter(efc, c172code == 1 | e16sex == 2))\n  expect_identical(x1, x2)\n\n  # \"AND\" works\n  x1 <- length(data_match(\n    efc,\n    data.frame(c172code = 1, e16sex = 2),\n    match = \"and\",\n    return_indices = TRUE\n  ))\n  x2 <- nrow(poorman::filter(efc, c172code == 1, e16sex == 2))\n  expect_identical(x1, x2)\n\n  # \"NOT\" works\n  x1 <- length(data_match(\n    efc,\n    data.frame(c172code = 1, e16sex = 2),\n    match = \"not\",\n    return_indices = TRUE\n  ))\n  x2 <- nrow(poorman::filter(efc, c172code != 1, e16sex != 2))\n  expect_identical(x1, x2)\n\n  # remove NA\n  x1 <- length(data_match(\n    efc,\n    data.frame(c172code = 1, e16sex = 2),\n    match = \"not\",\n    return_indices = TRUE,\n    remove_na = FALSE\n  ))\n  expect_identical(x1, 41L)\n  x1 <- length(data_match(\n    efc,\n    data.frame(c172code = 1, e16sex = 2),\n    match = \"not\",\n    return_indices = TRUE,\n    remove_na = TRUE\n  ))\n  expect_identical(x1, 36L)\n})\n\n\ntest_that(\"data_match and data_filter work similar\", {\n  out1 <- data_match(mtcars, data.frame(vs = 0, am = 1), match = \"not\")\n  out2 <- data_filter(mtcars, vs != 0 & am != 1)\n  expect_equal(out1, out2, ignore_attr = TRUE)\n\n  # using a data frame re-orders rows!\n  out1 <- data_match(mtcars, data.frame(vs = 0, am = 1), match = \"or\")\n  out2 <- data_filter(mtcars, vs == 0 | am == 1)\n  expect_equal(\n    out1[order(out1$vs, out1$am), ],\n    out2[order(out2$vs, out2$am), ],\n    ignore_attr = TRUE\n  )\n\n  # string representation is working\n  out1 <- data_match(mtcars, data.frame(vs = 0, am = 1), match = \"or\")\n  out2 <- data_filter(mtcars, \"vs == 0 | am == 1\")\n  expect_equal(\n    out1[order(out1$vs, out1$am), ],\n    out2[order(out2$vs, out2$am), ],\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"data_filter works\", {\n  out1 <- data_match(mtcars, data.frame(vs = 0, am = 1), match = \"not\")\n  out2 <- data_filter(mtcars, vs != 0 & am != 1)\n  out3 <- subset(mtcars, vs != 0 & am != 1)\n  out4 <- data_filter(mtcars, vs != 0, am != 1)\n  expect_equal(out1, out2, ignore_attr = TRUE)\n  expect_equal(out1, out3, ignore_attr = TRUE)\n  expect_equal(out2, out4, ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_filter works with string representation\", {\n  out1 <- data_match(mtcars, data.frame(vs = 0, am = 1), match = \"not\")\n  out2 <- data_filter(mtcars, \"vs != 0 & am != 1\")\n  out3 <- subset(mtcars, vs != 0 & am != 1)\n  out4 <- data_filter(mtcars, c(\"vs != 0\", \"am != 1\"))\n  expect_equal(out1, out2, ignore_attr = TRUE)\n  expect_equal(out1, out3, ignore_attr = TRUE)\n  expect_equal(out2, out3, ignore_attr = TRUE)\n  expect_equal(out2, out4, ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_filter works like slice\", {\n  out <- data_filter(mtcars, 5:10)\n  expect_equal(out, mtcars[5:10, ], ignore_attr = TRUE)\n  out <- data_filter(mtcars, \"5:10\")\n  expect_equal(out, mtcars[5:10, ], ignore_attr = TRUE)\n  slc <- 5:10\n  out <- data_filter(mtcars, slc)\n  expect_equal(out, mtcars[5:10, ], ignore_attr = TRUE)\n  slc <- \"5:10\"\n  out <- data_filter(mtcars, slc)\n  expect_equal(out, mtcars[5:10, ], ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_filter gives informative message on errors\", {\n  expect_error(\n    data_filter(mtcars, mpg = 10),\n    \"`==`\"\n  )\n  expect_error(\n    data_filter(mtcars, \"mpg > 10 || cyl = 4\"),\n    \"`==`\"\n  )\n  expect_error(\n    data_filter(mtcars, mpg > 10 || cyl == 4),\n    \"`||`\"\n  )\n  expect_error(\n    data_filter(mtcars, mpg > 10 && cyl == 4),\n    \"`&&`\"\n  )\n  ## TODO: need to check why this fails on R 4.1\n  skip_if(getRversion() < \"4.2.0\")\n  expect_error(\n    data_filter(mtcars, mpg > 10?cyl == 4),\n    \"syntax\"\n  )\n  expect_error(\n    data_filter(mtcars, mgp > 10?cyl == 4),\n    \"Variable \\\"mgp\\\"\"\n  )\n})\n\n\ntest_that(\"data_filter gives informative message on errors\", {\n  data(mtcars)\n  expect_error(\n    data_filter(mtcars, cxl == 6),\n    regex = \"Variable \\\"cxl\\\"\"\n  )\n  expect_error(\n    data_filter(mtcars, \"cxl == 6\"),\n    regex = \"Variable \\\"cxl\\\"\"\n  )\n})\n\n\ntest_that(\"data_filter works with >= or <=\", {\n  expect_identical(\n    data_filter(mtcars, \"mpg >= 30.4\"),\n    subset(mtcars, mpg >= 30.4)\n  )\n  expect_identical(\n    data_filter(mtcars, mpg >= 30.4),\n    subset(mtcars, mpg >= 30.4)\n  )\n  expect_identical(\n    data_filter(mtcars, \"mpg <= 30.4\"),\n    subset(mtcars, mpg <= 30.4)\n  )\n  expect_identical(\n    data_filter(mtcars, mpg <= 30.4),\n    subset(mtcars, mpg <= 30.4)\n  )\n  mpgl30 <- \"mpg <= 30.4\"\n  expect_identical(\n    data_filter(mtcars, mpgl30),\n    subset(mtcars, mpg <= 30.4)\n  )\n\n  expect_identical(\n    data_filter(mtcars, \"mpg >= 30.4 & hp == 66\"),\n    subset(mtcars, mpg >= 30.4 & hp == 66)\n  )\n  expect_identical(\n    data_filter(mtcars, mpg <= 30.4 & hp == 66),\n    subset(mtcars, mpg <= 30.4 & hp == 66)\n  )\n  mpgl30hp66 <- \"mpg >= 30.4 & hp == 66\"\n  expect_identical(\n    data_filter(mtcars, mpgl30hp66),\n    subset(mtcars, mpg >= 30.4 & hp == 66)\n  )\n})\n\n\ntest_that(\"programming with data_filter\", {\n  # One arg ------------\n\n  foo <- function(var) {\n    data_filter(mtcars, var)\n  }\n  expect_identical(\n    foo(\"mpg >= 30\"),\n    data_filter(mtcars, \"mpg >= 30\")\n  )\n\n  foo2 <- function(data) {\n    var2 <- \"mpg >= 30\"\n    data_filter(data, var2)\n  }\n  expect_identical(\n    foo2(mtcars),\n    data_filter(mtcars, \"mpg >= 30\")\n  )\n\n  foo3 <- function(data) {\n    var <- \"mpg >= 30\"\n    data_filter(data, var)\n  }\n  expect_identical(\n    foo3(mtcars),\n    data_filter(mtcars, \"mpg >= 30\")\n  )\n\n  # Two args -----------\n\n  foo4 <- function(data, var3) {\n    data_filter(data, var3)\n  }\n  expect_identical(\n    foo4(mtcars, \"mpg >= 30 & hp <= 66\"),\n    data_filter(mtcars, \"mpg >= 30 & hp <= 66\")\n  )\n})\n\n\ntest_that(\"programming with data_filter with variables\", {\n  var4 <- \"mpg >= 30 & hp <= 66\"\n  expect_identical(\n    data_filter(mtcars, var4),\n    data_filter(mtcars, \"mpg >= 30 & hp <= 66\")\n  )\n  var <- \"mpg >= 30 & hp <= 66\"\n  expect_identical(\n    data_filter(mtcars, var),\n    data_filter(mtcars, \"mpg >= 30 & hp <= 66\")\n  )\n})\n\n\ntest_that(\"data_filter works with groups\", {\n  test <- data.frame(\n    id = c(1, 1, 2, 2),\n    x = c(0, 1, 3, 4),\n    y = c(1, 2, 3, 4)\n  )\n  test <- data_group(test, \"id\")\n\n  expected <- data.frame(id = c(1, 2), x = c(0, 3), y = c(1, 3))\n  class(expected) <- c(\"grouped_df\", \"data.frame\")\n  attributes(expected)$groups <- attributes(test)$groups\n\n  expect_equal(\n    data_filter(test, x == min(x)),\n    expected,\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"data_filter programming works with groups\", {\n  test <- data.frame(\n    id = c(1, 1, 2, 2),\n    x = c(0, 1, 3, 4),\n    y = c(1, 2, 3, 4)\n  )\n  test <- data_group(test, \"id\")\n\n  expected <- data.frame(id = c(1, 2), x = c(0, 3), y = c(1, 3))\n  class(expected) <- c(\"grouped_df\", \"data.frame\")\n  attributes(expected)$groups <- attributes(test)$groups\n\n  expect_equal(\n    data_filter(test, \"x == min(x)\"),\n    expected,\n    ignore_attr = TRUE\n  )\n\n  foo_gr1 <- function(data, var) {\n    data_filter(data, var)\n  }\n  out <- foo_gr1(test, \"x == min(x)\")\n  expect_equal(out, expected, ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_filter with groups, different ways of dots\", {\n  grp <- data_group(mtcars, \"cyl\")\n  fli <- \"mpg <= 20\"\n  out1 <- data_filter(grp, mpg <= 20)\n  out2 <- data_filter(grp, \"mpg <= 20\")\n  out3 <- data_filter(grp, fli)\n  expect_identical(out1, out2)\n  expect_identical(out1, out3)\n})\n\n\ntest_that(\"data_filter, slicing works with functions\", {\n  d <- data.frame(\n    a = c(\"aa\", \"a1\", \"bb\", \"b1\", \"cc\", \"c1\"),\n    b = 1:6,\n    stringsAsFactors = FALSE\n  )\n\n  rows <- grep(\"^[A-Za-z][0-9]$\", x = d$a)\n  out1 <- data_filter(d, rows)\n  out2 <- data_filter(d, grep(\"^[A-Za-z][0-9]$\", x = d$a))\n\n  expect_identical(out1, out2)\n\n  out3 <- data_filter(iris, (Sepal.Width == 3.0) & (Species == \"setosa\"))\n  expect_identical(nrow(out3), 6L)\n\n  # styler: off\n  expect_error(\n    data_filter(iris, (Sepal.Width = 3.0) & (Species = \"setosa\")), # nolint\n    regex = \"Filtering did not work\"\n  )\n  # styler: on\n})\n\n\ntest_that(\"data_filter works with tibbles\", {\n  skip_if_not_installed(\"tibble\")\n  skip_if_not_installed(\"dplyr\")\n  data(mtcars)\n\n  # preserve class\n  d <- tibble::as_tibble(mtcars)\n  out <- data_filter(d, mpg > 15)\n  expect_s3_class(out, \"tbl_df\")\n\n  # preserve attributes\n  d <- tibble::as_tibble(mtcars)\n  d <- dplyr::group_by(d, cyl)\n  out <- data_filter(d, mpg > 15)\n  expect_s3_class(out, \"tbl_df\")\n  expect_named(attr(out, \"groups\"), c(\"cyl\", \".rows\"))\n})\n"
  },
  {
    "path": "tests/testthat/test-data_merge.R",
    "content": "data(mtcars)\nx <- mtcars[3:5, 1:3]\ny <- mtcars[30:32, c(1, 4:5)]\nz <- mtcars[11:13, 6:8]\n\nx$id <- 1:3\ny$id <- 2:4\nz$id <- 3:5\n\n# left -----------------------\n\ntest_that(\"left-join\", {\n  skip_if_not_installed(\"poorman\")\n\n  out <- data_merge(x, y, join = \"left\")\n  expect_identical(colnames(out), c(\"mpg\", \"cyl\", \"disp\", \"id\", \"hp\", \"drat\"))\n  expect_identical(dim(out), c(3L, 6L))\n  expect_identical(out, suppressMessages(poorman::left_join(x, y)))\n\n  out <- data_merge(x, y, join = \"left\", by = \"id\")\n  expect_identical(\n    colnames(out),\n    c(\"cyl\", \"disp\", \"id\", \"hp\", \"drat\", \"mpg.x\", \"mpg.y\")\n  )\n  expect_identical(out$disp, poorman::left_join(x, y, by = \"id\")$disp)\n  expect_identical(dim(out), c(3L, 7L))\n\n  out <- data_merge(x, y, join = \"left\", by = \"mpg\")\n  expect_identical(\n    colnames(out),\n    c(\"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"id.x\", \"id.y\")\n  )\n  expect_identical(out$disp, poorman::left_join(x, y, by = \"mpg\")$disp)\n  expect_identical(out$mpg, poorman::left_join(x, y, by = \"mpg\")$mpg)\n  expect_identical(dim(out), c(3L, 7L))\n})\n\n\n# semi/anti -----------------------\n\n# errors\ntest_that(\"semi-anti-join\", {\n  expect_error(data_merge(x, y, join = \"semi\"))\n  expect_error(data_merge(x, y, join = \"anti\"))\n})\n\n\n# right -----------------------\n\ntest_that(\"right-join\", {\n  skip_if_not_installed(\"poorman\")\n\n  out <- data_merge(x, y, join = \"right\")\n  expect_identical(colnames(out), c(\"mpg\", \"cyl\", \"disp\", \"id\", \"hp\", \"drat\"))\n  expect_identical(dim(out), c(3L, 6L))\n  # in data_merge(), we keep sorting from x, so do some preparation here\n  poor_out <- suppressMessages(poorman::right_join(x, y))\n  poor_out <- poor_out[order(poor_out$id), ]\n  row.names(poor_out) <- seq_len(nrow(poor_out))\n  expect_identical(out, poor_out)\n\n  out <- data_merge(x, y, join = \"right\", by = \"id\")\n  expect_identical(\n    colnames(out),\n    c(\"cyl\", \"disp\", \"id\", \"hp\", \"drat\", \"mpg.x\", \"mpg.y\")\n  )\n  # in data_merge(), we keep sorting from x, so do some preparation here\n  poor_out <- suppressMessages(poorman::right_join(x, y, by = \"id\"))\n  poor_out <- poor_out[order(poor_out$id), ]\n  expect_identical(out$disp, poor_out$disp)\n  expect_identical(dim(out), c(3L, 7L))\n\n  out <- data_merge(x, y, join = \"right\", by = \"mpg\")\n  expect_identical(\n    colnames(out),\n    c(\"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"id.x\", \"id.y\")\n  )\n  # in data_merge(), we keep sorting from x, so do some preparation here\n  poor_out <- suppressMessages(poorman::right_join(x, y, by = \"mpg\"))\n  poor_out <- poor_out[order(poor_out$id.y, decreasing = TRUE), ]\n  out <- out[order(out$id.y, decreasing = TRUE), ]\n  expect_identical(out$disp, poor_out$disp)\n  expect_identical(out$mpg, poor_out$mpg)\n  expect_identical(dim(out), c(3L, 7L))\n})\n\n\n# inner -----------------------\n\ntest_that(\"inner-join\", {\n  skip_if_not_installed(\"poorman\")\n\n  out <- data_merge(x, y, join = \"inner\")\n  expect_identical(colnames(out), c(\"mpg\", \"cyl\", \"disp\", \"id\", \"hp\", \"drat\"))\n  expect_identical(dim(out), c(0L, 6L))\n\n  out <- data_merge(x, y, join = \"inner\", by = \"id\")\n  expect_identical(\n    colnames(out),\n    c(\"cyl\", \"disp\", \"id\", \"hp\", \"drat\", \"mpg.x\", \"mpg.y\")\n  )\n  expect_identical(out$disp, poorman::inner_join(x, y, by = \"id\")$disp)\n  expect_identical(dim(out), c(2L, 7L))\n\n  out <- data_merge(x, y, join = \"inner\", by = \"mpg\")\n  expect_identical(\n    colnames(out),\n    c(\"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"id.x\", \"id.y\")\n  )\n  expect_identical(out$disp, poorman::inner_join(x, y, by = \"mpg\")$disp)\n  expect_identical(dim(out), c(1L, 7L))\n})\n\n\n# full -----------------------\n\ntest_that(\"full-join\", {\n  out <- data_merge(x, y, join = \"full\")\n  expect_identical(colnames(out), c(\"mpg\", \"cyl\", \"disp\", \"id\", \"hp\", \"drat\"))\n  expect_identical(dim(out), c(6L, 6L))\n  expect_identical(\n    out$mpg,\n    c(22.8, 21.4, 18.7, 19.7, 15, 21.4),\n    tolerance = 1e-2\n  )\n  expect_identical(out$id, c(1, 2, 3, 2, 3, 4), tolerance = 1e-2)\n\n  out <- data_merge(x, y, join = \"full\", by = \"id\")\n  expect_identical(\n    colnames(out),\n    c(\"cyl\", \"disp\", \"id\", \"hp\", \"drat\", \"mpg.x\", \"mpg.y\")\n  )\n  expect_identical(dim(out), c(4L, 7L))\n  expect_identical(out$mpg.x, c(22.8, 21.4, 18.7, NA), tolerance = 1e-2)\n  expect_identical(out$id, 1:4, tolerance = 1e-2)\n\n  out <- data_merge(x, y, join = \"full\", by = \"mpg\")\n  expect_identical(\n    colnames(out),\n    c(\"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"id.x\", \"id.y\")\n  )\n  expect_identical(dim(out), c(5L, 7L))\n  expect_identical(out$mpg, c(22.8, 21.4, 18.7, 19.7, 15), tolerance = 1e-2)\n  expect_identical(out$id.x, c(1, 2, 3, NA, NA), tolerance = 1e-2)\n\n  out <- data_merge(x, y, join = \"full\", by = c(\"id\", \"mpg\"))\n  expect_identical(colnames(out), c(\"mpg\", \"cyl\", \"disp\", \"id\", \"hp\", \"drat\"))\n  expect_identical(dim(out), c(6L, 6L))\n  expect_identical(\n    out$mpg,\n    c(22.8, 21.4, 18.7, 19.7, 15, 21.4),\n    tolerance = 1e-2\n  )\n  expect_identical(out$id, c(1, 2, 3, 2, 3, 4), tolerance = 1e-2)\n})\n\n\n# bind -----------------------\n\ntest_that(\"bind-join\", {\n  skip_if_not_installed(\"poorman\")\n\n  out <- data_merge(x, y, join = \"bind\")\n  poor_out <- poorman::bind_rows(x, y)\n  row.names(poor_out) <- seq_len(nrow(poor_out))\n  expect_identical(colnames(out), c(\"mpg\", \"cyl\", \"disp\", \"id\", \"hp\", \"drat\"))\n  expect_identical(dim(out), c(6L, 6L))\n  expect_identical(out, poor_out)\n\n  # by will be ignored\n  out <- data_merge(x, y, join = \"bind\", by = \"id\")\n  expect_identical(out, poor_out)\n\n  # by will be ignored\n  out <- data_merge(x, y, join = \"bind\", by = \"mpg\")\n  expect_identical(out, poor_out)\n\n  # by will be ignored\n  out <- data_merge(x, y, join = \"bind\", by = c(\"id\", \"mpg\"))\n  expect_identical(out, poor_out)\n\n  x <- mtcars[1, ]\n  y <- mtcars[2, ]\n  expect_warning(\n    {\n      out <- data_merge(x, y, join = \"bind\", id = \"mpg\")\n    },\n    regexp = \"already exists\"\n  )\n  expect_named(\n    out,\n    c(names(mtcars), \"mpg_1\")\n  )\n  expect_identical(out$mpg_1, c(1, 2))\n})\n\n# joins without common columns -----------------------\n\ntest_that(\"bind-join\", {\n  skip_if_not_installed(\"poorman\")\n\n  x2 <- mtcars[3:5, 1:3]\n  y2 <- mtcars[30:32, 4:6]\n\n  expect_warning(\n    data_merge(x2, y2, join = \"full\"),\n    \"Found no matching columns in the data frames.\"\n  )\n\n  expect_identical(\n    suppressWarnings(data_merge(x2, y2, join = \"full\")),\n    suppressMessages(poorman::full_join(x2, y2)),\n    ignore_attr = TRUE\n  )\n\n  expect_identical(\n    data_merge(x2, y2, join = \"bind\"),\n    poorman::bind_rows(x2, y2),\n    ignore_attr = TRUE\n  )\n})\n\n# joins without common columns -----------------------\n\ntest_that(\"compare bind and full joins\", {\n  x2 <- mtcars[3:5, 1:3]\n  y2 <- mtcars[30:32, 3:6]\n  expect_identical(\n    data_merge(x2, y2, join = \"full\"),\n    data_merge(x2, y2, join = \"bind\"),\n    ignore_attr = TRUE\n  )\n})\n\n# join data frames in a list -----------------------\n\ntest_that(\"join data frames in a list\", {\n  skip_if_not_installed(\"poorman\")\n\n  x <- mtcars[1:5, 1:3]\n  y <- mtcars[28:31, 3:5]\n  z <- mtcars[11:18, c(1, 3:4, 6:8)]\n  x$id <- 1:5\n  y$id <- 4:7\n  z$id <- 3:10\n\n  dat <- data_merge(list(x, y, z), by = \"id\", id = \"df\", join = \"bind\")\n\n  expect_identical(\n    remove_empty(subset(poorman::filter(dat, df == 1), select = -df)),\n    x,\n    ignore_attr = TRUE\n  )\n\n  expect_identical(\n    remove_empty(subset(poorman::filter(dat, df == 2), select = -c(df, id))),\n    subset(y, select = -id),\n    ignore_attr = TRUE\n  )\n\n  expect_identical(\n    remove_empty(subset(poorman::filter(dat, df == 3), select = -c(df, id))),\n    subset(z, select = -id),\n    ignore_attr = TRUE\n  )\n\n  x <- mtcars[1, ]\n  y <- mtcars[2, ]\n  expect_warning(\n    {\n      out <- data_merge(list(x, y), join = \"bind\", id = \"mpg\")\n    },\n    regexp = \"already exists\"\n  )\n  expect_named(\n    out,\n    c(names(mtcars), \"mpg_1\")\n  )\n  expect_identical(out$mpg_1, c(1, 2))\n})\n\n\n# join empty data frames -----------------------\n\nx <- data.frame(x = character(), stringsAsFactors = FALSE)\ny <- data.frame(x = character(), stringsAsFactors = FALSE)\nz <- data.frame(y = character(), stringsAsFactors = FALSE)\n\ntest_that(\"join empty data frames\", {\n  expect_identical(dim(data_merge(x, y, join = \"left\")), c(0L, 1L))\n  expect_identical(dim(data_merge(x, y, join = \"full\")), c(0L, 1L))\n  expect_identical(dim(data_merge(x, y, join = \"right\")), c(0L, 1L))\n  expect_identical(dim(data_merge(x, y, join = \"bind\")), c(0L, 1L))\n  expect_identical(dim(data_merge(x, z, join = \"bind\")), c(0L, 2L))\n})\n\n# join when all \"by\" are not present ---------------------\n\ntest_that(\"join when all 'by' are not present\", {\n  x <- mtcars[, c(\"mpg\", \"drat\", \"cyl\", \"qsec\")]\n  y <- mtcars[, c(\"mpg\", \"hp\", \"cyl\", \"wt\")]\n\n  expect_error(\n    {\n      out <- data_merge(x, y, by = c(\"mpg\", \"drat\", \"qsec\"))\n    },\n    regexp = \"Not all columns\"\n  )\n})\n\n# no warning for tibble #404 ---------------------\n\ntest_that(\"no warning for tibble when checking if column exist\", {\n  skip_if_not_installed(\"tibble\")\n  d_tibble <- tibble::as_tibble(iris)\n  expect_silent(data_merge(d_tibble, d_tibble[20:30, ], join = \"bind\"))\n})\n"
  },
  {
    "path": "tests/testthat/test-data_modify.R",
    "content": "## styler: off\n\ntest_that(\"data_modify works\", {\n  data(iris)\n  out <- data_modify(\n    iris,\n    Sepal_W_z = standardize(Sepal.Width),\n    Sepal_Wz_double = 2 * Sepal_W_z\n  )\n  expect_equal(\n    out$Sepal_W_z,\n    as.vector(scale(iris$Sepal.Width)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    out$Sepal_Wz_double,\n    2 * as.vector(scale(iris$Sepal.Width)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_modify works with strings\", {\n  data(iris)\n  out <- data_modify(\n    iris,\n    as_expr(\"Sepal_W_z = standardize(Sepal.Width)\")\n  )\n  expect_equal(\n    out$Sepal_W_z,\n    as.vector(scale(iris$Sepal.Width)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  out <- data_modify(\n    iris,\n    as_expr(c(\n      \"Sepal_W_z = standardize(Sepal.Width)\",\n      \"Sepal_Wz_double = 2 * Sepal_W_z\"\n    ))\n  )\n  expect_equal(\n    out$Sepal_Wz_double,\n    2 * as.vector(scale(iris$Sepal.Width)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_modify preserves labels\", {\n  data(efc)\n  out <- data_modify(\n    efc,\n    c12hour_c = center(c12hour),\n    c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\n  )\n  expect_identical(\n    attributes(out$c12hour_c)$label,\n    attributes(efc$c12hour)$label\n  )\n  expect_identical(\n    attributes(out$c12hour_z)$label,\n    attributes(efc$c12hour)$label\n  )\n  out <- data_modify(\n    efc,\n    as_expr(c(\n      \"c12hour_c = center(c12hour)\",\n      \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\"\n    ))\n  )\n  expect_identical(\n    attributes(out$c12hour_c)$label,\n    attributes(efc$c12hour)$label\n  )\n  expect_identical(\n    attributes(out$c12hour_z)$label,\n    attributes(efc$c12hour)$label\n  )\n})\n\n\ntest_that(\"data_modify recycling works\", {\n  data(iris)\n  out <- data_modify(iris, x = 1)\n  expect_equal(out$x, rep(1, nrow(iris)), ignore_attr = TRUE)\n  out <- data_modify(iris, x = c(1, 2))\n  expect_equal(out$x, rep(c(1, 2), nrow(iris) / 2), ignore_attr = TRUE)\n  expect_error(data_modify(iris, x = 1:4), regex = \"same length\")\n  out <- data_modify(iris, x = \"a\")\n  expect_equal(out$x, rep(\"a\", nrow(iris)), ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_modify recycling works with grouped df\", {\n  data(iris)\n  d <- data_group(iris, \"Species\")\n  expect_silent(data_modify(d, x = 1, test = 1:2))\n})\n\n\ntest_that(\"data_modify expression in character vector-1\", {\n  data(iris)\n  x <- \"var_a = Sepal.Width\"\n  out <- data_modify(iris, as_expr(x))\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"var_a\"\n    )\n  )\n})\n\n\ntest_that(\"data_modify expression in character vector-2\", {\n  data(iris)\n  foo <- function(data) {\n    y <- \"var_a = Sepal.Width\"\n    head(data_modify(data, as_expr(y)))\n  }\n  out <- foo(iris)\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"var_a\"\n    )\n  )\n  expect_identical(out$var_a, out$Sepal.Width)\n\n  foo2 <- function(data, z) {\n    head(data_modify(data, as_expr(z)))\n  }\n  out <- foo2(iris, \"var_a = Sepal.Width\")\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"var_a\"\n    )\n  )\n  expect_identical(out$var_a, out$Sepal.Width)\n})\n\n\ntest_that(\"data_modify expression in character vector-3\", {\n  data(iris)\n  aa <- \"2 * Sepal.Width\"\n  out <- data_modify(iris, new_var = as_expr(aa))\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"new_var\"\n    )\n  )\n  expect_identical(out$new_var, 2 * out$Sepal.Width)\n\n  aa <- \"2 * Sepal.Width\"\n  out <- data_modify(iris, new_var = as_expr(aa))\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"new_var\"\n    )\n  )\n  expect_identical(out$new_var, 2 * out$Sepal.Width)\n\n  foo_nv <- function(data, z) {\n    head(data_modify(data, new_var = as_expr(z)))\n  }\n  out <- foo_nv(iris, \"2 * Sepal.Width\")\n  expect_identical(\n    colnames(out),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"new_var\"\n    )\n  )\n  expect_identical(out$new_var, 2 * out$Sepal.Width)\n})\n\n\ntest_that(\"data_modify expression as character vector-4\", {\n  data(iris)\n  x <- \"var_a = Sepal.Width\"\n  y <- \"Sepal_Wz_double = 2 * var_a\"\n  out <- data_modify(iris, as_expr(c(x, y)))\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"var_a\",\n      \"Sepal_Wz_double\"\n    )\n  )\n  expect_identical(out$var_a, out$Sepal.Width)\n  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)\n\n  foo1 <- function(data) {\n    x1 <- \"var_a = Sepal.Width\"\n    y1 <- \"Sepal_Wz_double = 2 * var_a\"\n    combined <- c(x1, y1)\n    data_modify(iris, as_expr(combined))\n  }\n  out <- foo1(iris)\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"var_a\",\n      \"Sepal_Wz_double\"\n    )\n  )\n  expect_identical(out$var_a, out$Sepal.Width)\n  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)\n\n  foo2 <- function(data, z3) {\n    data_modify(data, as_expr(z3))\n  }\n  out <- foo2(iris, c(\"var_a = Sepal.Width\", \"Sepal_Wz_double = 2 * var_a\"))\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"var_a\",\n      \"Sepal_Wz_double\"\n    )\n  )\n  expect_identical(out$var_a, out$Sepal.Width)\n  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)\n\n  # works with separated strings\n  data(iris)\n  out <- data_modify(\n    iris,\n    as_expr(\"var_a = Sepal.Width\"),\n    as_expr(\"Sepal_Wz_double = 2 * var_a\")\n  )\n  expect_identical(out$var_a, out$Sepal.Width)\n  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)\n\n  out <- data_modify(\n    iris,\n    as_expr(c(\"var_a = Sepal.Width\", \"Sepal_Wz_double = 2 * var_a\"))\n  )\n  expect_identical(out$var_a, out$Sepal.Width)\n  expect_identical(out$Sepal_Wz_double, 2 * out$Sepal.Width)\n})\n\n\ntest_that(\"data_modify works with function as expression\", {\n  data(iris)\n  out <- data_modify(iris, foo = grepl(\"a\", Species)) # nolint\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n  out <- data_modify(iris, foo = as_expr(\"grepl(\\\"a\\\", Species)\"))\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n  out <- data_modify(iris, as_expr(\"foo = grepl(\\\"a\\\", Species)\"))\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n  out <- data_modify(iris, foo = as_expr(\"grepl('a', Species)\"))\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n  out <- data_modify(iris, as_expr(\"foo = grepl('a', Species)\"))\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n  out <- data_modify(iris, foo = as_expr('grepl(\\'a\\', Species)')) # nolint\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n  out <- data_modify(iris, as_expr('foo = grepl(\\'a\\', Species)')) # nolint\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n  out <- data_modify(iris, foo = as_expr('grepl(\\\"a\\\", Species)'))\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n  out <- data_modify(iris, as_expr('foo = grepl(\\\"a\\\", Species)'))\n  expect_identical(out$foo, rep(c(TRUE, FALSE, TRUE), each = 50))\n})\n\n\ntest_that(\"data_modify remove variables with NULL\", {\n  data(iris)\n  out <- data_modify(iris, PL_new = 2 * Petal.Length, Petal.Length = NULL)\n  expect_named(\n    out,\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Width\", \"Species\", \"PL_new\")\n  )\n  expect_identical(out$PL_new, 2 * iris$Petal.Length)\n\n  out <- data_modify(iris, as_expr(\"Species = NULL\"))\n  expect_named(\n    out,\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n})\n\n\ntest_that(\"data_modify works on grouped data\", {\n  data(efc)\n  grouped_efc <- data_group(efc, \"c172code\")\n  out <- data_modify(\n    grouped_efc,\n    c12hour_c = center(c12hour),\n    c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),\n    c12hour_z2 = standardize(c12hour)\n  )\n  out2 <- lapply(by(efc[\"c12hour\"], efc$c172code, scale), as.vector)\n  expect_equal(\n    na.omit(out$c12hour_z2[out$c172code == 1]),\n    out2[[1]],\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_modify works on grouped data, with character vectors\", {\n  data(efc)\n  grouped_efc <- data_group(efc, \"c172code\")\n  out <- data_modify(\n    grouped_efc,\n    as_expr(c(\n      \"c12hour_c = center(c12hour)\",\n      \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\",\n      \"c12hour_z2 = standardize(c12hour)\"\n    ))\n  )\n  out2 <- lapply(by(efc[\"c12hour\"], efc$c172code, scale), as.vector)\n  expect_equal(\n    na.omit(out$c12hour_z2[out$c172code == 1]),\n    out2[[1]],\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    na.omit(out$c12hour_z[out$c172code == 1]),\n    out2[[1]],\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_modify works on grouped data, preserves attributes and labels\", {\n  data(efc)\n  grouped_efc <- data_group(efc, \"c172code\")\n  out <- data_modify(\n    grouped_efc,\n    c12hour_c = center(c12hour)\n  )\n  expect_identical(\n    attributes(out$c12hour)$label,\n    attributes(efc$c12hour)$label\n  )\n})\n\n\ntest_that(\"data_modify works on grouped data, inside functions\", {\n  data(efc)\n  foo4 <- function(data) {\n    data_modify(\n      data,\n      as_expr(c(\n        \"c12hour_c = center(c12hour)\",\n        \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\",\n        \"c12hour_z2 = standardize(c12hour)\"\n      ))\n    )\n  }\n  out <- foo4(data_group(efc, \"c172code\"))\n  out2 <- lapply(by(efc[\"c12hour\"], efc$c172code, scale), as.vector)\n  expect_equal(\n    na.omit(out$c12hour_z2[out$c172code == 1]),\n    out2[[1]],\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    na.omit(out$c12hour_z[out$c172code == 1]),\n    out2[[1]],\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n\n  foo5 <- function(data, rec) {\n    data_modify(data, as_expr(rec))\n  }\n  out <- foo5(\n    data_group(efc, \"c172code\"),\n    c(\n      \"c12hour_c = center(c12hour)\",\n      \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\",\n      \"c12hour_z2 = standardize(c12hour)\"\n    )\n  )\n  out2 <- lapply(by(efc[\"c12hour\"], efc$c172code, scale), as.vector)\n  expect_equal(\n    na.omit(out$c12hour_z2[out$c172code == 1]),\n    out2[[1]],\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    na.omit(out$c12hour_z[out$c172code == 1]),\n    out2[[1]],\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_modify errors for non df\", {\n  expect_error(data_modify(\n    iris$Sepal.Length,\n    Sepal_W_z = standardize(Sepal.Width)\n  ))\n})\n\n\ntest_that(\"data_modify errors for empty data frames\", {\n  data(mtcars)\n  x <- mtcars[1, ]\n  expect_error(\n    data_modify(x[-1, ], new_var = 5),\n    regex = \"empty data frame\"\n  )\n})\n\n\ntest_that(\"data_modify errors for typos\", {\n  data(efc)\n  a <- \"center(c22hour)\" # <---------------- error in variable name\n  b <- \"c12hour_c / sd(c12hour, na.rm = TRUE)\"\n  expect_error(\n    data_modify(efc, c12hour_c = as_expr(a), c12hour_z = as_expr(b)),\n    regex = \"c22hour\"\n  )\n\n  a <- \"center(c12hour)\"\n  b <- \"c12hour_c / sd(c21hour, na.rm = TRUE)\" # <------ error in variable name\n  expect_error(\n    data_modify(efc, c12hour_c = as_expr(a), c12hour_z = as_expr(b)),\n    regex = \"c12hour_c\"\n  )\n\n  expect_error(\n    data_modify(efc, c12hour_c = as_expr(a), c12hour_z = as_expr(b)),\n    regex = \"second expression\"\n  )\n})\n\n\ntest_that(\"data_modify message about recycling values\", {\n  expect_snapshot(head(data_modify(iris, Sepal.Width = 1)))\n  expect_snapshot(head(data_modify(iris, Sepal.Width = 1:2)))\n  expect_snapshot(head(data_modify(iris, Petal.Length = 1, Sepal.Width = 1)))\n  expect_snapshot(head(data_modify(iris, Petal.Length = 1, Sepal.Width = 1:2)))\n  expect_snapshot(head(data_modify(iris, Petal.Length = 2, Sepal.Width = 2)))\n})\n\n\ntest_that(\"data_modify message about modified variables\", {\n  expect_snapshot(head(data_modify(iris, Sepal.Width = 2 * Sepal.Width)))\n  expect_snapshot(head(data_modify(\n    iris,\n    Petal.Length = Sepal.Length,\n    Sepal.Width = Petal.Width\n  )))\n})\n\n\ntest_that(\"data_modify works with character variables, and inside functions\", {\n  data(efc)\n  a <- \"center(c12hour)\"\n  b <- \"c12hour_c / sd(c12hour, na.rm = TRUE)\"\n  d <- \"standardize(c12hour)\"\n  out <- data_modify(\n    efc,\n    c12hour_c = as_expr(a),\n    c12hour_z = as_expr(b),\n    c12hour_z2 = as_expr(d)\n  )\n  expect_equal(\n    out$c12hour_z2,\n    as.vector(scale(efc$c12hour)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    out$c12hour_z,\n    as.vector(scale(efc$c12hour)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n\n  # when calling functions\n  a1 <- \"center(c12hour)\"\n  b1 <- \"c12hour_c / sd(c12hour, na.rm = TRUE)\"\n  d1 <- \"standardize(c12hour)\"\n  foo <- function(data, x1, x2, x3) {\n    data_modify(\n      efc,\n      c12hour_c = as_expr(x1),\n      c12hour_z = as_expr(x2),\n      c12hour_z2 = as_expr(x3)\n    )\n  }\n  out <- foo(efc, a1, b1, d1)\n  expect_equal(\n    out$c12hour_z2,\n    as.vector(scale(efc$c12hour)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    out$c12hour_z,\n    as.vector(scale(efc$c12hour)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n\n  # when calling functions, arguments inside function defined\n  foo2 <- function(data) {\n    a2 <- \"center(c12hour)\"\n    b2 <- \"c12hour_c / sd(c12hour, na.rm = TRUE)\"\n    d2 <- \"standardize(c12hour)\"\n\n    data_modify(\n      efc,\n      c12hour_c = as_expr(a2),\n      c12hour_z = as_expr(b2),\n      c12hour_z2 = as_expr(d2)\n    )\n  }\n  out <- foo2(efc)\n  expect_equal(\n    out$c12hour_z2,\n    as.vector(scale(efc$c12hour)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    out$c12hour_z,\n    as.vector(scale(efc$c12hour)),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_modify works with grouped df when overwriting existing variables\", {\n  data(iris)\n  iris_grp <- data_group(iris, \"Species\")\n  out <- data_modify(iris_grp, Sepal.Length = normalize(Sepal.Length))\n  expect_equal(\n    head(out$Sepal.Length),\n    c(0.53333, 0.4, 0.26667, 0.2, 0.46667, 0.73333),\n    tolerance = 1e-3\n  )\n\n  out <- data_modify(\n    iris_grp,\n    Sepal.Length = normalize(Sepal.Length),\n    Sepal.Length2 = 2 * Sepal.Length\n  )\n  expect_equal(\n    head(out$Sepal.Length2),\n    2 * c(0.53333, 0.4, 0.26667, 0.2, 0.46667, 0.73333),\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_modify works with functions that return character vectors\", {\n  data(iris)\n  set.seed(123)\n  out <- data_modify(iris, grp = sample(letters[1:3], nrow(iris), TRUE))\n  expect_identical(head(out$grp), c(\"a\", \"c\", \"b\", \"a\", \"c\", \"c\"))\n})\n\n\ntest_that(\"data_modify 1:n() and similar works in (grouped) data frames\", {\n  data(mtcars)\n  out <- data_modify(mtcars, Trials = 1:n()) # nolint\n  expect_identical(out$Trials, 1:32)\n  x <- data_group(mtcars, \"gear\")\n  out <- data_modify(x, Trials = 1:n()) # nolint\n  expect_identical(out$Trials[out$gear == 3], 1:15)\n  expect_identical(out$Trials[out$gear == 4], 1:12)\n  out <- data_modify(x, Trials = 3:(n() + 2))\n  expect_identical(out$Trials[out$gear == 3], 3:17)\n  expect_identical(out$Trials[out$gear == 4], 3:14)\n})\n\n\ntest_that(\"data_modify .if/.at arguments\", {\n  data(iris)\n  d <- iris[1:5, ]\n  # validate results\n  out <- data_modify(d, .at = \"Species\", .modify = as.numeric)\n  expect_identical(out$Species, c(1, 1, 1, 1, 1))\n  out <- data_modify(d, .if = is.factor, .modify = as.numeric)\n  expect_identical(out$Species, c(1, 1, 1, 1, 1))\n  out <- data_modify(\n    d,\n    new_length = Petal.Length * 2,\n    .at = \"Species\",\n    .modify = as.numeric\n  )\n  expect_identical(out$Species, c(1, 1, 1, 1, 1))\n  expect_named(\n    out,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"new_length\"\n    )\n  )\n  # using other functions with `.at`\n  out <- data_modify(\n    d,\n    .at = extract_column_names(d, select = starts_with(\"Sepal\")),\n    .modify = as.factor\n  )\n  expect_s3_class(out$Sepal.Length, \"factor\")\n  expect_s3_class(out$Sepal.Width, \"factor\")\n\n  # .at and .if cannot be used at same timne\n  expect_error(\n    data_modify(d, .at = \"Species\", .if = is.factor, .modify = as.numeric),\n    regex = \"You cannot use both\"\n  )\n  # modify must be a function\n  expect_error(\n    data_modify(d, .at = \"Species\", .modify = \"a\"),\n    regex = \"`.modify` must\"\n  )\n  # unknown variable\n  expect_error(\n    data_modify(d, .at = c(\"Species\", \"Test\"), .modify = as.numeric),\n    regex = \"Variable \\\"Test\\\"\"\n  )\n  # unknown variables\n  expect_error(\n    data_modify(d, .at = c(\"Species\", \"Hi\", \"Test\"), .modify = as.numeric),\n    regex = \"Variables \\\"Hi\\\" and \\\"Test\\\"\"\n  )\n  # one of .at or .if must be specified\n  expect_error(\n    data_modify(d, .modify = as.numeric),\n    regex = \"You need to specify\"\n  )\n  # function not applicable to factors\n  expect_error(\n    data_modify(d, .at = \"Species\", .modify = function(x) 2 / y + x),\n    regex = \"Error in modifying variable\"\n  )\n  # function not applicable to factors\n  expect_error(\n    data_modify(d, .at = \"Species\", .modify = function(x) 2 * x),\n    regex = \"Error in modifying variable\"\n  )\n  # .modify needs to be specified\n  expect_error(\n    data_modify(d, .at = \"Species\", .if = is.factor),\n    regex = \"You need to specify\"\n  )\n  # newly created variables are processed by if/at\n  out <- data_modify(\n    d,\n    new_length = Petal.Length * 2,\n    .if = is.numeric,\n    .modify = round\n  )\n  expect_equal(out$new_length, c(3, 3, 3, 3, 3), ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_modify works with new expressions, different use cases same results\", {\n  data(iris)\n  out1 <- data_modify(iris, as_expr(\"sepwid = 2 * Sepal.Width\"))\n  out2 <- data_modify(iris, sepwid = as_expr(\"2 * Sepal.Width\"))\n  e <- \"sepwid = 2 * Sepal.Width\"\n  out3 <- data_modify(iris, as_expr(e))\n  e <- \"2 * Sepal.Width\"\n  out4 <- data_modify(iris, sepwid = as_expr(e))\n\n  expect_equal(head(out1), head(out2), ignore_attr = TRUE, tolerance = 1e-4)\n  expect_equal(head(out1), head(out3), ignore_attr = TRUE, tolerance = 1e-4)\n  expect_equal(head(out1), head(out4), ignore_attr = TRUE, tolerance = 1e-4)\n\n  out1b <- data_modify(\n    iris,\n    as_expr(c(\"sepwid = 2 * Sepal.Width\", \"seplen = 5 * Sepal.Length\"))\n  )\n  out2b <- data_modify(\n    iris,\n    sepwid = as_expr(\"2 * Sepal.Width\"),\n    seplen = as_expr(\"5 * Sepal.Length\")\n  )\n  e <- c(\"sepwid = 2 * Sepal.Width\", \"seplen = 5 * Sepal.Length\")\n  out3b <- data_modify(iris, as_expr(e))\n  e <- \"2 * Sepal.Width\"\n  out4b <- data_modify(iris, sepwid = as_expr(e), seplen = 5 * Sepal.Length)\n\n  expect_equal(head(out1b), head(out2b), ignore_attr = TRUE, tolerance = 1e-4)\n  expect_equal(head(out1b), head(out3b), ignore_attr = TRUE, tolerance = 1e-4)\n  expect_equal(head(out1b), head(out4b), ignore_attr = TRUE, tolerance = 1e-4)\n\n  # no expression\n  out <- data_modify(iris, sepwid = \"2 * Sepal.Widht\")\n  expect_identical(\n    head(out$sepwid),\n    c(\n      \"2 * Sepal.Widht\",\n      \"2 * Sepal.Widht\",\n      \"2 * Sepal.Widht\",\n      \"2 * Sepal.Widht\",\n      \"2 * Sepal.Widht\",\n      \"2 * Sepal.Widht\"\n    )\n  )\n\n  # works with paste()\n  to_standardize <- c(\"Petal.Length\", \"Sepal.Length\")\n  out <- data_modify(\n    iris,\n    as_expr(\n      paste0(to_standardize, \"_stand = standardize(\", to_standardize, \")\")\n    )\n  )\n  expect_equal(\n    head(out$Petal.Length_stand),\n    c(-1.33575, -1.33575, -1.3924, -1.2791, -1.33575, -1.16581),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    head(out$Sepal.Length_stand),\n    c(-0.89767, -1.1392, -1.38073, -1.50149, -1.01844, -0.53538),\n    tolerance = 1e-3\n  )\n\n  # complex example\n  e <- \"2 * Sepal.Width\"\n  f <- \"half_petal = 0.5 * Petal.Length\"\n  a <- \"string\"\n  num <- 1:5\n  out_complex <- data_modify(\n    iris,\n    sepwid = as_expr(e),\n    seplen = 5 * Sepal.Length,\n    as_expr(f),\n    new_var = a,\n    new_num = num,\n    new_var2 = \"ho\",\n    new_num2 = 4:6,\n    Sepal.Length = NULL,\n    Petal.Length = NULL,\n    Sepal.Width = NULL,\n    Petal.Width = NULL\n  )\n  expect_snapshot(print(head(out_complex)))\n})\n\n\ntest_that(\"data_modify works with new expressions, grouped_df, different use cases same results\", {\n  data(efc, package = \"datawizard\")\n  grouped_efc <- data_group(efc, \"c172code\")\n  new_efc1 <- data_modify(\n    grouped_efc,\n    c12hour_c = center(c12hour),\n    c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),\n    c12hour_z2 = standardize(c12hour),\n    id = 1:n() # nolint\n  )\n\n  new_efc2 <- data_modify(\n    grouped_efc,\n    as_expr(\"c12hour_c = center(c12hour)\"),\n    c12hour_z = as_expr(\"c12hour_c / sd(c12hour, na.rm = TRUE)\"),\n    c12hour_z2 = standardize(c12hour),\n    id = 1:n() # nolint\n  )\n  expect_equal(\n    head(new_efc1),\n    head(new_efc2),\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n\n  s <- c(\n    \"c12hour_c = center(c12hour)\",\n    \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\",\n    \"c12hour_z2 = standardize(c12hour)\"\n  )\n  new_efc3 <- data_modify(\n    grouped_efc,\n    as_expr(s),\n    id = 1:n() # nolint\n  )\n  expect_equal(\n    head(new_efc1),\n    head(new_efc3),\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n\n  new_efc4 <- data_modify(\n    grouped_efc,\n    c12hour_c = center(c12hour),\n    c12hour_z = as_expr(\"c12hour_c / sd(c12hour, na.rm = TRUE)\"),\n    c12hour_z2 = standardize(c12hour),\n    id = 1:n() # nolint\n  )\n  expect_equal(\n    head(new_efc1),\n    head(new_efc4),\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n})\n\n\ntest_that(\"data_modify errors with new expressions\", {\n  e <- \"sepwid = 2 * Sepal.Widht\"\n  expect_error(\n    data_modify(iris, as_expr(e)),\n    regex = \"in the first expression\"\n  )\n  expect_error(\n    data_modify(iris, as_expr(e)),\n    regex = \"Sepal.Widht\"\n  )\n\n  expect_error(\n    data_modify(iris, as_expr(\"sepwid = 2 * Sepal.Widht\")),\n    regex = \"in the first expression\"\n  )\n  expect_error(\n    data_modify(iris, as_expr(\"sepwid = 2 * Sepal.Widht\")),\n    regex = \"Sepal.Widht\"\n  )\n\n  expect_error(\n    data_modify(iris, sepwid = 2 * Sepal.Widht),\n    regex = \"in the first expression\"\n  )\n  expect_error(\n    data_modify(iris, sepwid = 2 * Sepal.Widht),\n    regex = \"Sepal.Widht\"\n  )\n\n  expect_error(\n    data_modify(iris, as_expr(\"2 * Sepal.Widht\")),\n    regex = \"variable name\"\n  )\n\n  e <- \"2 * Sepal.Widht\"\n  expect_error(\n    data_modify(iris, as_expr(e)),\n    regex = \"variable name\"\n  )\n\n  data(efc, package = \"datawizard\")\n  a <- \"center(c22hour)\" # <---------------- error in variable name\n  b <- \"c12hour_c / sd(c12hour, na.rm = TRUE)\"\n  expect_error(\n    data_modify(efc, c12hour_c = as_expr(a), c12hour_z = as_expr(b)),\n    regex = \"c22hour\"\n  )\n\n  expect_error(\n    data_modify(iris, a = as_expr(c(\"1 + 1\", \"2 + 2\"))),\n    regex = \"Could not evaluate expression\"\n  )\n})\n\n\nskip_if_not_installed(\"withr\")\n\nwithr::with_environment(\n  new.env(),\n  test_that(\"data_modify 1:n() and similar works in (grouped) data frames inside function calls\", {\n    data(mtcars)\n    x <- data_group(mtcars, \"gear\")\n\n    foo <- function(d) {\n      out <- data_modify(d, Trials = 1:n()) # nolint\n      out$Trials\n    }\n    expect_identical(\n      foo(x),\n      c(\n        1L,\n        2L,\n        3L,\n        1L,\n        2L,\n        3L,\n        4L,\n        4L,\n        5L,\n        6L,\n        7L,\n        5L,\n        6L,\n        7L,\n        8L,\n        9L,\n        10L,\n        8L,\n        9L,\n        10L,\n        11L,\n        12L,\n        13L,\n        14L,\n        15L,\n        11L,\n        1L,\n        2L,\n        3L,\n        4L,\n        5L,\n        12L\n      )\n    )\n  })\n)\n\ntest_that(\"data_modify errors on non-defined function\", {\n  expect_error(data_modify(iris, Species = foo()))\n})\n\n\nwithr::with_environment(\n  new.env(),\n  test_that(\"data_modify correctly assigns values from variables\", {\n    d <- data.frame()\n    for (param in letters[c(1, 2, 5)]) {\n      out <- data.frame(x = as.numeric(as.factor(param)))\n      out <- data_modify(out, Parameter = param)\n      d <- rbind(out, d)\n    }\n    expect_named(d, c(\"x\", \"Parameter\"))\n    expect_identical(d$Parameter, c(\"e\", \"b\", \"a\"))\n\n    d <- data.frame()\n    for (param in c(\"a 1\", \"b 2\")) {\n      out <- data.frame(x = as.numeric(as.factor(param)))\n      out <- data_modify(out, Parameter = param)\n      d <- rbind(out, d)\n    }\n    expect_named(d, c(\"x\", \"Parameter\"))\n    expect_identical(d$Parameter, c(\"b 2\", \"a 1\"))\n\n    # variable is not copied, values is used\n    a <- \"x\"\n    d <- data.frame(x = 1)\n    out <- data_modify(d, y = a)\n    expect_identical(out$y, \"x\")\n  })\n)\n\nwithr::with_environment(\n  new.env(),\n  test_that(\"data_modify passes expression syntax to function\", {\n    foo1 <- function(data, ...) {\n      head(data_modify(data, ...))\n    }\n    out1 <- foo1(iris, SW_fraction = Sepal.Width / 10)\n    out2 <- foo1(iris, as_expr(\"SW_fraction = Sepal.Width / 10\"))\n    expect_identical(out1, out2)\n  })\n)\n\n## styler: on\n"
  },
  {
    "path": "tests/testthat/test-data_partition.R",
    "content": "test_that(\"data_partition works as expected\", {\n  # not supported\n\n  expect_error(\n    data_partition(new.env()),\n    \"`data` must be a data frame\"\n  )\n\n  # to be coerced to data frames\n\n  expect_snapshot(data_partition(letters, seed = 123))\n\n  # validation checks\n\n  expect_warning(\n    data_partition(iris, 0.7, row_id = \"Species\"),\n    \"exists\"\n  )\n\n  expect_warning(expect_warning(\n    data_partition(iris, c(0.7, 0.3), row_id = \"Species\"),\n    \"generated\"\n  ))\n\n  # values\n\n  out <- data_partition(mtcars, proportion = 0.8, seed = 123)\n\n  expect_identical(\n    out$p_0.8$.row_id,\n    c(\n      1L,\n      3L,\n      4L,\n      5L,\n      7L,\n      8L,\n      9L,\n      10L,\n      11L,\n      14L,\n      15L,\n      17L,\n      18L,\n      19L,\n      20L,\n      21L,\n      22L,\n      23L,\n      24L,\n      26L,\n      27L,\n      28L,\n      29L,\n      30L,\n      31L,\n      32L\n    )\n  )\n\n  expect_identical(\n    colnames(out$p_0.8),\n    c(\n      \"mpg\",\n      \"cyl\",\n      \"disp\",\n      \"hp\",\n      \"drat\",\n      \"wt\",\n      \"qsec\",\n      \"vs\",\n      \"am\",\n      \"gear\",\n      \"carb\",\n      \".row_id\"\n    )\n  )\n\n  expect_identical(\n    lapply(out, nrow),\n    list(p_0.8 = 26L, test = 6L)\n  )\n\n  # data frames\n\n  data(iris)\n  expect_snapshot(str(data_partition(iris, proportion = 0.7, seed = 123)))\n  expect_snapshot(str(data_partition(\n    iris,\n    proportion = c(0.2, 0.5),\n    seed = 123\n  )))\n  expect_snapshot(str(data_partition(\n    iris,\n    proportion = 0.7,\n    by = \"Species\",\n    seed = 123\n  )))\n  expect_snapshot(str(data_partition(\n    iris,\n    proportion = c(0.2, 0.5),\n    by = \"Species\",\n    seed = 123\n  )))\n})\n\ntest_that(\"data_partition warns if no testing set\", {\n  expect_warning(\n    data_partition(iris, proportion = 1),\n    \"sums up to 1\"\n  )\n  expect_warning(\n    data_partition(iris, proportion = c(0.5, 0.5)),\n    \"sums up to 1\"\n  )\n})\n\ntest_that(\"data_partition errors if values in proportion not between 0 and 1\", {\n  expect_error(\n    data_partition(iris, proportion = 1.3),\n    \"cannot be higher\"\n  )\n  expect_error(\n    data_partition(iris, proportion = c(0.5, 0.6)),\n    \"cannot be higher\"\n  )\n  expect_error(\n    data_partition(iris, proportion = c(1.3, -1)),\n    \"cannot be negative\"\n  )\n  expect_error(\n    data_partition(iris, proportion = -1),\n    \"cannot be negative\"\n  )\n})\n\ntest_that(\"data_partition warns if row_id already exists\", {\n  iris2 <- iris\n\n  iris2[[\".row_id\"]] <- \"A\"\n  expect_warning(\n    data_partition(iris2, proportion = 0.5),\n    \"already exists\"\n  )\n\n  iris2[[\"foo\"]] <- \"A\"\n  expect_warning(\n    data_partition(iris2, proportion = 0.5, row_id = \"foo\"),\n    \"already exists\"\n  )\n\n  part1 <- data_partition(iris, proportion = 0.5, seed = 123)\n  part2 <- suppressWarnings(data_partition(iris2, proportion = 0.5, seed = 123))\n\n  expect_identical(\n    part1$p_0.5[1:5],\n    part2$p_0.5[1:5]\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_peek.R",
    "content": "test_that(\"data_peek works as expected\", {\n  out <- data_peek(iris)\n  expect_named(out, c(\"Variable\", \"Type\", \"Values\"))\n  expect_identical(\n    out$Variable,\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n  expect_identical(dim(out), c(5L, 3L))\n})\n\ntest_that(\"data_peek works as expected with select\", {\n  out <- data_peek(iris, select = 2:4)\n  expect_named(out, c(\"Variable\", \"Type\", \"Values\"))\n  expect_identical(\n    out$Variable,\n    c(\"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n  expect_identical(dim(out), c(3L, 3L))\n})\n\ntest_that(\"data_peek works as expetced with custom width\", {\n  out <- data_peek(iris, width = 130)\n  expect_named(out, c(\"Variable\", \"Type\", \"Values\"))\n  expect_identical(\n    out$Variable,\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n  expect_identical(dim(out), c(5L, 3L))\n})\n\ntest_that(\"data_peek snapshots look as expected\", {\n  expect_snapshot(data_peek(iris))\n  expect_snapshot(data_peek(iris, select = 1:3))\n  expect_snapshot(data_peek(iris, width = 130))\n})\n"
  },
  {
    "path": "tests/testthat/test-data_read.R",
    "content": "skip_if_not_installed(\"httr\")\nskip_if_not_installed(\"readxl\")\nskip_if_not_installed(\"haven\")\nskip_if_not_installed(\"readr\")\nskip_if_not_installed(\"data.table\")\nskip_if_not_installed(\"rio\")\n\nskip_on_cran()\n\nskip_if_not_installed(\"curl\")\nskip_if_offline()\n\n# csv -------------------------\n\ntest_that(\"data_read - csv\", {\n  d <- data_read(\n    \"https://raw.githubusercontent.com/easystats/circus/main/data/bootstrapped.csv\",\n    verbose = FALSE\n  )\n  expect_identical(dim(d), c(10000L, 4L))\n})\n\n\n# csv -------------------------\n\ntest_that(\"data_read, skip_empty\", {\n  d <- data_read(\n    \"https://raw.githubusercontent.com/easystats/circus/main/data/test_skip_empty.csv\",\n    verbose = FALSE\n  )\n  expect_identical(ncol(d), 3L)\n  expect_identical(colnames(d), c(\"Var1\", \"Var2\", \"Var3\"))\n})\n\n\n# tsv -------------------------\n\ntest_that(\"data_read - tsv\", {\n  skip_if_not_installed(\"withr\")\n\n  withr::with_tempfile(\"temp_file\", fileext = \".tsv\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/sample1.tsv\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(\n      temp_file,\n      verbose = FALSE\n    )\n    expect_identical(nrow(d), 3L)\n    expect_identical(colnames(d), c(\"a\", \"b\", \"c\"))\n    expect_identical(sum(vapply(d, is.numeric, FUN.VALUE = logical(1L))), 2L)\n    expect_identical(sum(vapply(d, is.character, FUN.VALUE = logical(1L))), 1L)\n  })\n})\n\n\n# excel -------------------------\n\ntest_that(\"data_read - excel\", {\n  skip_if_not_installed(\"withr\")\n\n  withr::with_tempfile(\"temp_file\", fileext = \".xlsx\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/sample1.xlsx\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(\n      temp_file,\n      verbose = FALSE\n    )\n\n    expect_identical(nrow(d), 3L)\n    expect_identical(colnames(d), c(\"a\", \"b\", \"c\"))\n    expect_identical(sum(vapply(d, is.numeric, FUN.VALUE = logical(1L))), 2L)\n    expect_identical(sum(vapply(d, is.character, FUN.VALUE = logical(1L))), 1L)\n  })\n})\n\n\n# Stata file -----------------------------------\n\ntest_that(\"data_read - Stata file\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".dta\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/stata_test.dta\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(\n      temp_file,\n      verbose = FALSE\n    )\n    expect_identical(\n      d,\n      data.frame(\n        mpg = c(21, 21, 22.8),\n        cyl = c(6, 6, 4),\n        disp = c(160, 160, 108)\n      )\n    )\n  })\n})\n\n\n# SAS file -----------------------------------\n\ntest_that(\"data_read - SAS file\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".sas7bdat\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/sas_test.sas7bdat\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(\n      temp_file,\n      verbose = FALSE\n    )\n    expect_identical(\n      d,\n      data.frame(\n        mpg = c(21, 21, 22.8),\n        cyl = c(6, 6, 4),\n        disp = c(160, 160, 108)\n      )\n    )\n  })\n})\n\n\n# RDS file, matrix, coercible -----------------------------------\n\ntest_that(\"data_read - RDS file, matrix, coercible\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".rds\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/matrix_object.rds\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    expect_message({\n      d <- data_read(\n        temp_file,\n        verbose = TRUE\n      )\n    })\n\n    expect_s3_class(d, \"data.frame\")\n    expect_identical(dim(d), c(2L, 5L))\n  })\n})\n\n\n# RDS file, preserve class /types -----------------------------------\n\ntest_that(\"data_read - RDS file, preserve class\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".rds\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/hiv.rds\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(temp_file, verbose = FALSE)\n    expect_s3_class(d, \"data.frame\")\n    expect_identical(\n      sapply(d, class),\n      c(\n        village = \"integer\",\n        outcome = \"integer\",\n        distance = \"numeric\",\n        amount = \"numeric\",\n        incentive = \"integer\",\n        age = \"integer\",\n        hiv2004 = \"integer\",\n        agecat = \"factor\"\n      )\n    )\n  })\n})\n\n\n# RData -----------------------------------\n\ntest_that(\"data_read - no warning for RData\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".RData\", code = {\n    data(mtcars)\n    save(mtcars, file = temp_file)\n    expect_silent(data_read(temp_file, verbose = FALSE))\n  })\n})\n\n\ntest_that(\"data_read - message for multiple objects in RData\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".RData\", code = {\n    data(mtcars)\n    data(iris)\n    save(mtcars, iris, file = temp_file)\n    expect_message(\n      expect_message(\n        data_read(temp_file, verbose = TRUE),\n        regex = \"File contained more than one object\"\n      ),\n      \"Reading data\"\n    )\n  })\n})\n\n\n# SPSS file -----------------------------------\n\ntest_that(\"data_read - SPSS file\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".sav\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/EFC.sav\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(\n      temp_file,\n      verbose = FALSE\n    )\n    expect_identical(sum(vapply(d, is.factor, FUN.VALUE = logical(1L))), 15L)\n    expect_identical(sum(vapply(d, is.numeric, FUN.VALUE = logical(1L))), 11L)\n    expect_identical(\n      levels(d$c172code),\n      c(\n        \"low level of education\",\n        \"intermediate level of education\",\n        \"high level of education\"\n      )\n    )\n    expect_identical(\n      attr(d$n4pstu, \"labels\"),\n      c(\n        `spouse/partner` = 1,\n        child = 2,\n        sibling = 3,\n        `daughter or son -in-law` = 4\n      )\n    )\n  })\n})\n\n\n# SPSS file 2 ---------------------------------\n\ntest_that(\"data_read - SPSS file 2\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".sav\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/spss_test.sav\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(\n      temp_file,\n      verbose = FALSE\n    )\n\n    expect_identical(\n      d,\n      structure(\n        list(\n          V1 = structure(\n            1:4,\n            levels = c(\n              \"Eins\",\n              \"Zwei\",\n              \"Drei\",\n              \"Vier\"\n            ),\n            class = \"factor\",\n            converted_to_factor = TRUE,\n            label = \"Variable 1\"\n          ),\n          V2 = structure(\n            c(2, 3, 4, 1),\n            labels = c(\n              Eins = 1,\n              Zwei = 2,\n              Drei = 3\n            ),\n            label = \"Variable 2\"\n          ),\n          V3 = structure(\n            c(\n              3L,\n              2L,\n              1L,\n              4L\n            ),\n            levels = c(\"Eins\", \"Zwei\", \"Drei\", \"Vier\"),\n            class = \"factor\",\n            converted_to_factor = TRUE,\n            label = \"Variable 3\"\n          )\n        ),\n        row.names = c(NA, -4L),\n        class = \"data.frame\"\n      )\n    )\n  })\n})\n\n\n# zipped SPSS file -----------------------------------\n\ntest_that(\"data_read - zipped SPSS file\", {\n  withr::with_tempfile(\"temp_file\", fileext = \".zip\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/EFC.zip\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(\n      temp_file,\n      verbose = FALSE\n    )\n    expect_identical(sum(vapply(d, is.factor, FUN.VALUE = logical(1L))), 15L)\n    expect_identical(sum(vapply(d, is.numeric, FUN.VALUE = logical(1L))), 11L)\n\n    d <- data_read(\n      temp_file,\n      convert_factors = FALSE,\n      verbose = FALSE\n    )\n    expect_identical(sum(vapply(d, is.factor, FUN.VALUE = logical(1L))), 0L)\n    expect_identical(sum(vapply(d, is.numeric, FUN.VALUE = logical(1L))), 26L)\n  })\n})\n\n\n# SPSS file, many value labels  -----------------------------------\n\ntest_that(\"data_read, convert many labels correctly\", {\n  # Output validated against SPSS output from original dataset\n\n  withr::with_tempfile(\"temp_file\", fileext = \".sav\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/spss_many_labels.sav\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(\n      temp_file,\n      verbose = FALSE\n    )\n    # all are factors by default\n    expect_identical(\n      vapply(d, class, character(1)),\n      c(selv1 = \"factor\", c12 = \"factor\", c12a = \"factor\", c12c = \"factor\")\n    )\n    expect_identical(\n      levels(d$selv1),\n      c(\n        \"Vignette 1 weiblich (Gülsen E. Reinigungskraft B)\",\n        \"Vignette 2 weiblich (Gülsen E. Anwältin B)\",\n        \"Vignette 3 weiblich (Monika E. Reinigungskraft B)\",\n        \"Vignette 4 weiblich (Monika E. Anwältin B)\",\n        \"Vignette 5 männlich (Hasan E. Reinigungskraft B)\",\n        \"Vignette 6 männlich (Hasan E. Anwalt B)\",\n        \"Vignette 7 männlich (Martin E. Reinigungskraft B)\",\n        \"Vignette 8 männlich (Martin E. Anwalt B)\",\n        \"Vignette 9 weiblich (Gülsen E. Reinigungskraft E)\",\n        \"Vignette 10 weiblich (Gülsen E. Anwältin E)\",\n        \"Vignette 11 weiblich (Monika E. Reinigungskraft E)\",\n        \"Vignette 12 weiblich (Monika E. Anwältin E)\",\n        \"Vignette 13 männlich (Hasan E. Reinigungskraft E)\",\n        \"Vignette 14 männlich (Hasan E. Anwalt E)\",\n        \"Vignette 15 männlich (Martin E. Reinigungskraft E)\",\n        \"Vignette 16 männlich (Martin E. Anwalt E)\"\n      )\n    )\n    expect_snapshot(data_tabulate(d$selv1))\n\n    expect_identical(levels(d$c12), c(\"ja\", \"nein\", \"keine Angabe\"))\n    expect_snapshot(data_tabulate(d$c12))\n\n    expect_identical(levels(d$c12a), c(\"Filter\", \"ja\", \"nein\", \"keine Angabe\"))\n    expect_snapshot(data_tabulate(d$c12a))\n    expect_identical(\n      levels(d$c12c),\n      c(\n        \"Filter\",\n        \"0 = keine\",\n        \"1\",\n        \"2\",\n        \"3\",\n        \"4\",\n        \"5\",\n        \"6\",\n        \"7\",\n        \"8\",\n        \"9\",\n        \"10 = sehr starke\",\n        \"weiß nicht / keine Angabe\"\n      )\n    )\n    expect_snapshot(data_tabulate(d$c12c))\n\n    expect_message(\n      expect_message(\n        expect_message(\n          data_read(temp_file),\n          regexp = \"Reading\"\n        ),\n        regexp = \"Variables where all\"\n      ),\n      regexp = \"4 out of 4\"\n    )\n\n    d <- data_read(\n      temp_file,\n      convert_factors = FALSE,\n      verbose = FALSE\n    )\n    # all are factors by default\n    expect_identical(\n      vapply(d, class, character(1)),\n      c(selv1 = \"numeric\", c12 = \"numeric\", c12a = \"numeric\", c12c = \"numeric\")\n    )\n    expect_snapshot(table(d$selv1))\n    expect_identical(\n      attributes(d$selv1)$labels,\n      c(\n        `Vignette 1 weiblich (Gülsen E. Reinigungskraft B)` = 1,\n        `Vignette 2 weiblich (Gülsen E. Anwältin B)` = 2,\n        `Vignette 3 weiblich (Monika E. Reinigungskraft B)` = 3,\n        `Vignette 4 weiblich (Monika E. Anwältin B)` = 4,\n        `Vignette 5 männlich (Hasan E. Reinigungskraft B)` = 5,\n        `Vignette 6 männlich (Hasan E. Anwalt B)` = 6,\n        `Vignette 7 männlich (Martin E. Reinigungskraft B)` = 7,\n        `Vignette 8 männlich (Martin E. Anwalt B)` = 8,\n        `Vignette 9 weiblich (Gülsen E. Reinigungskraft E)` = 9,\n        `Vignette 10 weiblich (Gülsen E. Anwältin E)` = 10,\n        `Vignette 11 weiblich (Monika E. Reinigungskraft E)` = 11,\n        `Vignette 12 weiblich (Monika E. Anwältin E)` = 12,\n        `Vignette 13 männlich (Hasan E. Reinigungskraft E)` = 13,\n        `Vignette 14 männlich (Hasan E. Anwalt E)` = 14,\n        `Vignette 15 männlich (Martin E. Reinigungskraft E)` = 15,\n        `Vignette 16 männlich (Martin E. Anwalt E)` = 16,\n        `99` = 99\n      )\n    )\n\n    expect_snapshot(table(d$c12))\n    expect_identical(\n      attributes(d$c12)$labels,\n      c(Filter = -2, ja = 1, nein = 2, `keine Angabe` = 99)\n    )\n\n    expect_snapshot(table(d$c12a))\n    expect_identical(\n      attributes(d$c12a)$labels,\n      c(Filter = -2, ja = 1, nein = 2, `keine Angabe` = 99)\n    )\n\n    expect_snapshot(table(d$c12c))\n    expect_identical(\n      attributes(d$c12c)$labels,\n      c(\n        Filter = -2,\n        `0 = keine` = 0,\n        `1` = 1,\n        `2` = 2,\n        `3` = 3,\n        `4` = 4,\n        `5` = 5,\n        `6` = 6,\n        `7` = 7,\n        `8` = 8,\n        `9` = 9,\n        `10 = sehr starke` = 10,\n        `weiß nicht / keine Angabe` = 99\n      )\n    )\n  })\n})\n\n\n# invalid file type -------------------------\n\ntest_that(\"data_read, no file extension\", {\n  expect_error(data_read(\"mytestfile\"), regex = \"extension\")\n  expect_error(data_read(NULL, regex = \"extension\"))\n})\n\n\n# file not exists -------------------------\n\ntest_that(\"data_read, file not exists\", {\n  expect_error(data_read(\"thisfileshouldnotexist.csv\"), regex = \"not exist\")\n  expect_error(\n    suppressMessages(data_read(\"thisfileshouldnotexist.sav\")),\n    regex = \"not exist\"\n  )\n})\n\n# RDS file, no data frame -----------------------------------\n\ntest_that(\"data_read - RDS file, no data frame\", {\n  skip_if_not_installed(\"withr\")\n\n  withr::with_tempfile(\"temp_file\", fileext = \".rda\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/list_for_testing.rda\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    expect_message(\n      expect_warning(\n        d <- data_read(temp_file, verbose = TRUE), # nolint\n        regex = \"no data frame\"\n      ),\n      \"Reading data\"\n    )\n    expect_type(d, \"list\")\n  })\n})\n\ntest_that(\"data_read - RDA file, model object\", {\n  skip_if_not_installed(\"withr\")\n  skip_if_not_installed(\"brms\")\n\n  withr::with_tempfile(\"temp_file\", fileext = \".rds\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/model_object.rds\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    expect_message(\n      expect_message(\n        d <- data_read(temp_file, verbose = TRUE), # nolint\n        regex = \"Imported file is a regression\"\n      ),\n      \"Reading data\"\n    )\n    expect_s3_class(d, \"lm\")\n  })\n\n  withr::with_tempfile(\"temp_file\", fileext = \".rda\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/brms_1.rda\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    expect_message(\n      expect_message(\n        d <- data_read(temp_file, verbose = TRUE), # nolint\n        regex = \"Imported file is a regression\"\n      ),\n      \"Reading data\"\n    )\n    expect_s3_class(d, \"brmsfit\")\n  })\n})\n\n\ntest_that(\"data_read - RDS file, from URL\", {\n  # works with URL\n  request <- httr::GET(\n    \"https://raw.github.com/easystats/circus/main/data/model_object.rds\"\n  )\n  httr::stop_for_status(request)\n  expect_message(\n    expect_message(\n      d <- data_read(\n        # nolint\n        \"https://raw.github.com/easystats/circus/main/data/model_object.rds\",\n        verbose = TRUE\n      ),\n      regex = \"Imported file is a regression\"\n    ),\n    \"Reading data\"\n  )\n})\n\n\ntest_that(\"data_read - nanoparquet\", {\n  skip_if_not_installed(\"withr\")\n  skip_if_not_installed(\"nanoparquet\")\n\n  withr::with_tempfile(\"temp_file\", fileext = \".parquet\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/penguins.parquet\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- data_read(temp_file)\n    expect_named(\n      d,\n      c(\n        \"species\",\n        \"island\",\n        \"bill_len\",\n        \"bill_dep\",\n        \"flipper_len\",\n        \"body_mass\",\n        \"sex\",\n        \"year\"\n      )\n    )\n    expect_identical(dim(d), c(344L, 8L))\n  })\n})\n"
  },
  {
    "path": "tests/testthat/test-data_recode.R",
    "content": "# set recode pattern old=new --------------\n\noptions(data_recode_pattern = \"old=new\")\n\n\n# numeric -----------------------\n\nset.seed(123)\nx <- sample(c(1:4, NA), 15, TRUE)\n\ntest_that(\"recode numeric\", {\n  out <- recode_values(x, list(`1` = 0, `2:3` = 1, `4` = 2))\n  expect_equal(\n    out,\n    c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(\n    x,\n    list(`1` = 0, `2:3` = 1, `4` = 2, `NA` = 9),\n    preserve_na = FALSE\n  )\n  expect_equal(\n    out,\n    c(1, 1, 1, 1, 1, 9, 2, 0, 1, 1, 9, 1, 1, 0, 2),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(x, list(`1` = 0, `2:3` = 1, `4` = 2, `NA` = 9))\n  expect_equal(\n    out,\n    c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(\n    x,\n    list(`1` = 0, `2` = 1),\n    default = 99,\n    preserve_na = FALSE\n  )\n  expect_equal(\n    out,\n    c(99, 99, 1, 1, 99, 99, 99, 0, 1, 99, 99, 99, 99, 0, 99),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(x, list(`1` = 0, `2` = 1), default = 99)\n  expect_equal(\n    out,\n    c(99, 99, 1, 1, 99, NA, 99, 0, 1, 99, NA, 99, 99, 0, 99),\n    ignore_attr = TRUE\n  )\n})\n\n\n# Date -----------------------\n\nset.seed(123)\nx <- as.Date(\"2022-01-01\")\n\ntest_that(\"recode date\", {\n  expect_message(recode_values(x))\n})\n\n\n# factor -----------------------\n\nset.seed(123)\nx <- as.factor(sample(c(\"a\", \"b\", \"c\"), 15, TRUE))\n\ntest_that(\"recode factor\", {\n  out <- recode_values(x, list(a = \"x\", `b, c` = \"y\"))\n  expect_equal(\n    out,\n    structure(\n      c(\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        1L,\n        2L,\n        2L,\n        1L,\n        2L,\n        2L\n      ),\n      .Label = c(\"x\", \"y\"),\n      class = \"factor\"\n    ),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(x, list(a = \"x\", `b, c` = \"y\"))\n  expect_equal(\n    out,\n    structure(\n      c(\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        1L,\n        2L,\n        2L,\n        1L,\n        2L,\n        2L\n      ),\n      .Label = c(\"x\", \"y\"),\n      class = \"factor\"\n    ),\n    ignore_attr = TRUE\n  )\n})\n\nset.seed(123)\nx <- as.factor(sample(c(\"a\", \"b\", \"c\", NA_character_), 15, TRUE))\n\ntest_that(\"recode factor\", {\n  out <- recode_values(x, list(a = \"x\", `b, c` = \"y\"))\n  expect_equal(\n    as.character(out),\n    c(\"y\", \"y\", \"y\", \"y\", \"y\", \"y\", \"y\", \"y\", \"y\", \"x\", NA, \"y\", \"y\", \"x\", \"y\"),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(x, list(a = \"x\", b = NA))\n  expect_equal(\n    as.character(out),\n    c(\"c\", \"c\", \"c\", NA, \"c\", NA, NA, NA, \"c\", \"x\", NA, NA, NA, \"x\", NA),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(x, list(a = \"x\", b = \"y\"), default = \"zz\")\n  expect_equal(\n    as.character(out),\n    c(\n      \"zz\",\n      \"zz\",\n      \"zz\",\n      \"y\",\n      \"zz\",\n      \"y\",\n      \"y\",\n      \"y\",\n      \"zz\",\n      \"x\",\n      NA,\n      \"y\",\n      \"y\",\n      \"x\",\n      \"y\"\n    ),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(\n    x,\n    list(a = \"x\", b = \"y\"),\n    default = \"zz\",\n    preserve_na = FALSE\n  )\n  expect_equal(\n    as.character(out),\n    c(\n      \"zz\",\n      \"zz\",\n      \"zz\",\n      \"y\",\n      \"zz\",\n      \"y\",\n      \"y\",\n      \"y\",\n      \"zz\",\n      \"x\",\n      \"zz\",\n      \"y\",\n      \"y\",\n      \"x\",\n      \"y\"\n    ),\n    ignore_attr = TRUE\n  )\n})\n\n\n# character -----------------------\n\nset.seed(123)\nx <- as.character(sample(c(\"a\", \"b\", \"c\"), 15, TRUE))\n\ntest_that(\"recode character\", {\n  out <- recode_values(x, list(a = \"x\", `b, c` = \"y\"))\n  expect_equal(\n    out,\n    c(\n      \"y\",\n      \"y\",\n      \"y\",\n      \"y\",\n      \"y\",\n      \"y\",\n      \"y\",\n      \"y\",\n      \"y\",\n      \"x\",\n      \"y\",\n      \"y\",\n      \"x\",\n      \"y\",\n      \"y\"\n    ),\n    ignore_attr = TRUE\n  )\n})\n\n\n# data frame -----------------------\n\nset.seed(123)\nd <- data.frame(\n  x = sample(c(1:4, NA), 15, TRUE),\n  y = as.factor(sample(c(\"a\", \"b\", \"c\"), 15, TRUE)),\n  stringsAsFactors = FALSE\n)\n\ntest_that(\"recode data.frame\", {\n  out <- recode_values(\n    d,\n    recode = list(`1` = 0, `2:3` = 1, `4` = 2, a = \"x\", `b, c` = \"y\")\n  )\n  expect_equal(\n    out,\n    structure(\n      list(\n        x = c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n        y = structure(\n          c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L),\n          .Label = c(\"x\", \"y\"),\n          class = \"factor\"\n        )\n      ),\n      row.names = c(NA, 15L),\n      class = \"data.frame\"\n    ),\n    ignore_attr = TRUE\n  )\n\n  out <- recode_values(\n    d,\n    recode = list(`1` = 0, `2:3` = 1, `4` = 2, a = \"x\", `b, c` = \"y\")\n  )\n  expect_equal(\n    out,\n    structure(\n      list(\n        x = c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n        y = structure(\n          c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L),\n          .Label = c(\"x\", \"y\"),\n          class = \"factor\"\n        )\n      ),\n      row.names = c(NA, 15L),\n      class = \"data.frame\"\n    ),\n    ignore_attr = TRUE\n  )\n\n  out <- recode_values(\n    d,\n    recode = list(`1` = 0, `2:3` = 1, `4` = 2, a = \"x\", `b, c` = \"y\"),\n    select = is.numeric()\n  )\n  expect_equal(\n    out,\n    structure(\n      list(\n        x = c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n        y = structure(\n          c(1L, 1L, 1L, 3L, 2L, 3L, 2L, 1L, 2L, 3L, 2L, 1L, 3L, 3L, 1L),\n          .Label = c(\"a\", \"b\", \"c\"),\n          class = \"factor\"\n        )\n      ),\n      row.names = c(NA, 15L),\n      class = \"data.frame\"\n    ),\n    ignore_attr = TRUE\n  )\n})\n\n\n# set recode pattern back to default --------------\n\noptions(data_recode_pattern = NULL)\n\nset.seed(123)\nx <- sample(c(1:4, NA), 15, TRUE)\n\ntest_that(\"recode numeric\", {\n  out <- recode_values(x, list(`0` = 1, `1` = 2:3, `2` = 4))\n  expect_equal(\n    out,\n    c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(\n    x,\n    list(`0` = 1, `1` = 2:3, `2` = 4, `9` = NA),\n    preserve_na = FALSE\n  )\n  expect_equal(\n    out,\n    c(1, 1, 1, 1, 1, 9, 2, 0, 1, 1, 9, 1, 1, 0, 2),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(\n    x,\n    list(`0` = 1, `1` = 2:3, `2` = 4, `9` = NA),\n    preserve_na = TRUE\n  )\n  expect_equal(\n    out,\n    c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"recode, recode-arg is named list\", {\n  expect_warning(expect_identical(\n    recode_values(x, recode = c(`0` = 1, `1` = 2:3, `2` = 4)),\n    x\n  ))\n})\n\n\nset.seed(123)\nx <- as.factor(sample(c(\"a\", \"b\", \"c\"), 15, TRUE))\n\ntest_that(\"recode factor\", {\n  out <- recode_values(x, list(x = \"a\", y = \"b, c\"))\n  expect_equal(\n    out,\n    structure(\n      c(\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        1L,\n        2L,\n        2L,\n        1L,\n        2L,\n        2L\n      ),\n      .Label = c(\"x\", \"y\"),\n      class = \"factor\"\n    ),\n    ignore_attr = TRUE\n  )\n  out <- recode_values(x, list(x = \"a\", y = c(\"b\", \"c\")))\n  expect_equal(\n    out,\n    structure(\n      c(\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        2L,\n        1L,\n        2L,\n        2L,\n        1L,\n        2L,\n        2L\n      ),\n      .Label = c(\"x\", \"y\"),\n      class = \"factor\"\n    ),\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"recode, recode-arg is named list\", {\n  expect_warning(expect_identical(\n    recode_values(x, recode = c(x = \"a\", y = \"b, c\")),\n    x\n  ))\n})\n\n\nset.seed(123)\nd <- data.frame(\n  x = sample(c(1:4, NA), 15, TRUE),\n  y = as.factor(sample(c(\"a\", \"b\", \"c\"), 15, TRUE)),\n  stringsAsFactors = FALSE\n)\n\ntest_that(\"recode data.frame\", {\n  out <- recode_values(\n    d,\n    recode = list(`0` = 1, `1` = 2:3, `2` = 4, x = \"a\", y = \"b, c\")\n  )\n  expect_equal(\n    out,\n    structure(\n      list(\n        x = c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n        y = structure(\n          c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L),\n          .Label = c(\"x\", \"y\"),\n          class = \"factor\"\n        )\n      ),\n      row.names = c(NA, 15L),\n      class = \"data.frame\"\n    ),\n    ignore_attr = TRUE\n  )\n\n  out <- recode_values(\n    d,\n    recode = list(`0` = 1, `1` = 2:3, `2` = 4, x = \"a\", y = c(\"b\", \"c\"))\n  )\n  expect_equal(\n    out,\n    structure(\n      list(\n        x = c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n        y = structure(\n          c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L),\n          .Label = c(\"x\", \"y\"),\n          class = \"factor\"\n        )\n      ),\n      row.names = c(NA, 15L),\n      class = \"data.frame\"\n    ),\n    ignore_attr = TRUE\n  )\n\n  out <- recode_values(\n    d,\n    recode = list(`0` = 1, `1` = 2:3, `2` = 4, x = \"a\", y = c(\"b\", \"c\")),\n    select = is.numeric()\n  )\n  expect_equal(\n    out,\n    structure(\n      list(\n        x = c(1, 1, 1, 1, 1, NA, 2, 0, 1, 1, NA, 1, 1, 0, 2),\n        y = structure(\n          c(1L, 1L, 1L, 3L, 2L, 3L, 2L, 1L, 2L, 3L, 2L, 1L, 3L, 3L, 1L),\n          .Label = c(\"a\", \"b\", \"c\"),\n          class = \"factor\"\n        )\n      ),\n      row.names = c(NA, 15L),\n      class = \"data.frame\"\n    ),\n    ignore_attr = TRUE\n  )\n})\n\n# select helpers ------------------------------\ntest_that(\"recode_values regex\", {\n  expect_identical(\n    recode_values(\n      iris,\n      select = \"ies\",\n      regex = TRUE,\n      recode = list(\n        Group1 = \"setosa\",\n        Group2 = \"versicolor\",\n        Group3 = \"virginica\"\n      )\n    ),\n    recode_values(\n      iris,\n      select = \"Species\",\n      recode = list(\n        Group1 = \"setosa\",\n        Group2 = \"versicolor\",\n        Group3 = \"virginica\"\n      )\n    )\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_relocate.R",
    "content": "test_that(\"data_relocate works as expected\", {\n  expect_error(\n    data_relocate(iris, select = \"Species\", before = 2, after = 3),\n    \"You must supply only one of `before` or `after`.\"\n  )\n\n  expect_error(\n    data_relocate(iris, select = \"Species\", before = 10),\n    \"No valid position defined in `before`.\"\n  )\n\n  expect_error(\n    data_relocate(iris, select = \"Species\", after = 10),\n    \"No valid position defined in `after`.\"\n  )\n\n  expect_named(\n    data_relocate(iris, select = \"Species\", before = \"Sepal.Length\"),\n    c(\"Species\", \"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n  expect_named(\n    data_relocate(iris, select = \"Species\", before = \"Sepal.Width\"),\n    c(\"Sepal.Length\", \"Species\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n\n  expect_named(\n    data_relocate(iris, select = \"Sepal.Width\", after = \"Species\"),\n    names(data_relocate(iris, select = \"Sepal.Width\", after = -1))\n  )\n\n  expect_named(\n    data_relocate(\n      iris,\n      select = c(\"Species\", \"Petal.Length\"),\n      after = \"Sepal.Width\"\n    ),\n    names(data_relocate(iris, select = c(\"Species\", \"Petal.Length\"), after = 2))\n  )\n})\n\n\ntest_that(\"data_relocate select-helpers\", {\n  expect_identical(\n    colnames(data_relocate(iris, select = starts_with(\"Sepal\"), after = 5)),\n    colnames(iris[c(3:5, 1:2)])\n  )\n  expect_identical(\n    colnames(data_relocate(iris, select = 1:2, after = 5)),\n    colnames(iris[c(3:5, 1:2)])\n  )\n  expect_identical(\n    colnames(data_relocate(iris, select = -1)),\n    colnames(iris[c(2:5, 1)])\n  )\n  expect_identical(\n    colnames(data_relocate(iris, select = Species, after = 1)),\n    colnames(iris[c(1, 5, 2:4)])\n  )\n  expect_identical(\n    colnames(data_relocate(iris, select = ~ Sepal.Width + Species)),\n    colnames(iris[c(2, 5, 1, 3:4)])\n  )\n  expect_identical(\n    colnames(data_relocate(iris, select = starts_with(\"sepal\"), after = 5)),\n    colnames(iris)\n  )\n  expect_identical(\n    colnames(data_relocate(\n      iris,\n      select = starts_with(\"sepal\"),\n      after = 5,\n      ignore_case = TRUE\n    )),\n    colnames(iris[c(3:5, 1:2)])\n  )\n})\n\n\n# preserve attributes --------------------------\n\ntest_that(\"data_relocate preserves attributes\", {\n  skip_if_not_installed(\"parameters\")\n\n  m <- lm(Sepal.Length ~ Species, data = iris)\n  out <- parameters::parameters(m)\n  a1 <- attributes(out)\n\n  out2 <- data_relocate(out, 4:6)\n  a2 <- attributes(out2)\n\n  # attributes may not be in the same order\n  expect_true(all(names(a1) %in% names(a2)))\n  expect_identical(length(a1), length(a2))\n})\n\n\n# select helpers ------------------------------\ntest_that(\"data_relocate regex\", {\n  expect_identical(\n    names(data_relocate(mtcars, select = \"pg\", regex = TRUE, after = \"carb\"))[\n      11\n    ],\n    \"mpg\"\n  )\n})\n\n\n# fuzzy matching ------------------------------\nout <- data.frame(\n  Parameter = \"Test\",\n  Median = 0.5,\n  CI_low = 0.4,\n  CI_high = 0.6,\n  pd = 0.97,\n  Rhat = 0.99,\n  ESS = 1000,\n  log_BF = 3,\n  stringsAsFactors = FALSE\n)\n\ntest_that(\"data_relocate misspelled\", {\n  # close match\n  expect_error(\n    data_relocate(out, \"pd\", before = \"BF\"),\n    \"log_BF\"\n  )\n  # close multiple matches\n  expect_error(\n    data_relocate(out, \"pd\", before = \"CIl\"),\n    \"CI_low\"\n  )\n  # not even close\n  expect_error(\n    data_relocate(out, \"pd\", before = \"xyz\"),\n    \"misspelled\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_remove.R",
    "content": "test_that(\"data_remove works as expected\", {\n  expect_identical(\n    data_remove(BOD, \"Time\"),\n    structure(\n      list(demand = c(8.3, 10.3, 19, 16, 15.6, 19.8)),\n      class = \"data.frame\",\n      row.names = c(NA, 6L),\n      reference = \"A1.4, p. 270\"\n    )\n  )\n})\n\n\ntest_that(\"data_remove works with NSE\", {\n  expect_named(\n    data_remove(iris, starts_with(\"Sepal\")),\n    c(\"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_named(\n    data_remove(iris, \"Sepal\"),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_named(\n    data_remove(iris, c(\"Sepal.Length\", \"Sepal.Width\")),\n    c(\"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_named(\n    data_remove(iris, regex(\"\\\\.\")),\n    \"Species\"\n  )\n\n  expect_named(\n    data_remove(iris, Sepal.Width:Petal.Width),\n    c(\"Sepal.Length\", \"Species\")\n  )\n\n  expect_named(\n    data_remove(iris, contains(\"Sep\")),\n    c(\"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_named(\n    data_remove(iris, contains(\"sep\")),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_named(\n    data_remove(iris, contains(\"sep\"), ignore_case = TRUE),\n    c(\"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_named(\n    data_remove(iris, 1:3),\n    c(\"Petal.Width\", \"Species\")\n  )\n\n  expect_identical(\n    colnames(data_remove(iris, c(1, 5))),\n    colnames(iris)[2:4]\n  )\n\n  expect_identical(\n    colnames(data_remove(iris, -1:-2)),\n    colnames(iris)[1:2]\n  )\n\n  expect_identical(\n    colnames(data_remove(iris, c(1, 4:5))),\n    colnames(iris)[2:3]\n  )\n\n  expect_identical(\n    colnames(data_remove(iris, \"abc\")),\n    colnames(iris)\n  )\n\n  expect_named(\n    data_remove(iris, \"Species\"),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n\n  expect_named(\n    data_remove(iris, \"species\"),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_named(\n    data_remove(iris, \"species\", ignore_case = TRUE),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n})\n\n\ntest_that(\"data_remove from other functions\", {\n  test_fun <- function(data, i) {\n    data_remove(data, select = i)\n  }\n  expect_named(\n    test_fun(iris, c(\"Sepal.Length\", \"Sepal.Width\")),\n    c(\"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n})\n\n\n# preserve attributes --------------------------\n\ntest_that(\"data_remove preserves attributes\", {\n  skip_if_not_installed(\"parameters\")\n\n  m <- lm(Sepal.Length ~ Species, data = iris)\n  out <- parameters::parameters(m)\n  a1 <- attributes(out)\n\n  out2 <- data_remove(out, \"SE\")\n  a2 <- attributes(out2)\n\n  # attributes may not be in the same order\n  expect_true(all(names(a1) %in% names(a2)))\n  expect_identical(length(a1), length(a2))\n})\n\n# select helpers ------------------------------\ntest_that(\"data_remove regex\", {\n  expect_identical(\n    names(data_remove(mtcars, select = \"pg\", regex = TRUE)),\n    names(mtcars[-(1)])\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_rename.R",
    "content": "test <- head(iris)\n\n# basic tests --------------\n\ntest_that(\"data_rename works with one or several replacements\", {\n  expect_named(\n    data_rename(test, \"Sepal.Length\", \"length\"),\n    c(\"length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n  expect_named(\n    data_rename(\n      test,\n      c(\"Sepal.Length\", \"Sepal.Width\"),\n      c(\"length\", \"width\")\n    ),\n    c(\"length\", \"width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n  expect_named(\n    data_rename(test, c(length = \"Sepal.Length\", width = \"Sepal.Width\")),\n    c(\"length\", \"width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n})\n\ntest_that(\"data_rename cannot have a partially named vector\", {\n  expect_error(\n    data_rename(test, c(length = \"Sepal.Length\", \"Sepal.Width\")),\n    \"all elements must\"\n  )\n})\n\ntest_that(\"data_rename returns a data frame\", {\n  x <- data_rename(test, \"Sepal.Length\", \"length\")\n  expect_s3_class(x, \"data.frame\")\n})\n\ntest_that(\"data_rename: multiple selection types\", {\n  expect_named(\n    data_rename(test, select = 1, \"foo\"),\n    c(\"foo\", names(iris)[2:5])\n  )\n  expect_named(\n    data_rename(test, select = regex(\"tal\"), c(\"foo1\", \"foo2\")),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"foo1\", \"foo2\", \"Species\")\n  )\n})\n\ntest_that(\"data_rename: replacement not allowed to have NA or empty strings\", {\n  expect_error(\n    data_rename(\n      test,\n      select = c(\"Species\", \"Sepal.Length\"),\n      replacement = c(\"foo\", NA_character_)\n    ),\n    regexp = \"`replacement` is not allowed\"\n  )\n})\n\n# replacement -------------\n\ntest_that(\"data_rename errors when no replacement\", {\n  expect_error(\n    data_rename(test, select = c(\"Sepal.Length\", \"Petal.Length\")),\n    \"There are more names in `select` than in `replacement`\"\n  )\n})\n\ntest_that(\"data_rename errors when too many names in 'replacement'\", {\n  expect_error(\n    data_rename(test, replacement = paste0(\"foo\", 1:6)),\n    \"There are more names in `replacement` than in `select`\"\n  )\n})\n\ntest_that(\"data_rename works when not enough names in 'replacement'\", {\n  expect_error(\n    data_rename(test, replacement = paste0(\"foo\", 1:2)),\n    \"There are more names in `select` than in `replacement`\"\n  )\n})\n\n\n# no select --------------\n\ntest_that(\"data_rename errors when select = NULL\", {\n  expect_error(\n    data_rename(test),\n    \"more names in `select`\"\n  )\n})\n\n\n# other --------------\n\ntest_that(\"data_rename deals correctly with duplicated replacement\", {\n  x <- data_rename(\n    test,\n    select = names(test)[1:4],\n    replacement = c(\"foo\", \"bar\", \"foo\", \"bar\")\n  )\n  expect_identical(dim(test), dim(x))\n  expect_named(x[1:4], c(\"foo\", \"bar\", \"foo.2\", \"bar.2\"))\n})\n\n\n# preserve attributes --------------------------\n\ntest_that(\"data_rename preserves attributes\", {\n  skip_if_not_installed(\"parameters\")\n\n  m <- lm(Sepal.Length ~ Species, data = iris)\n  out <- parameters::parameters(m)\n  a1 <- attributes(out)\n\n  out2 <- data_rename(out, \"p\", \"p-val\")\n  a2 <- attributes(out2)\n\n  expect_named(a1, names(a2))\n})\n\n\n# glue-styled select --------------------------\n\ntest_that(\"data_rename glue-style\", {\n  data(mtcars)\n  out <- data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"formerly_{col}\")\n  expect_named(out, c(\"formerly_mpg\", \"formerly_cyl\", \"formerly_disp\"))\n  out <- data_rename(\n    mtcars[1:3],\n    c(\"mpg\", \"cyl\", \"disp\"),\n    \"{col}_is_column_{n}\"\n  )\n  expect_named(out, c(\"mpg_is_column_1\", \"cyl_is_column_2\", \"disp_is_column_3\"))\n  out <- data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"new_{letter}\")\n  expect_named(out, c(\"new_a\", \"new_b\", \"new_c\"))\n})\n\ntest_that(\"data_rename enough letters\", {\n  data(efc, package = \"datawizard\")\n  data(mtcars)\n  data(iris)\n  data(ChickWeight)\n  data(ToothGrowth)\n  data(USArrests)\n  data(airquality)\n  x <- cbind(\n    mtcars[1:5, ],\n    iris[1:5, ],\n    efc[1:5, ],\n    ChickWeight[1:5, ],\n    ToothGrowth[1:5, ],\n    USArrests[1:5, ],\n    airquality[1:5, ]\n  )\n  expect_named(\n    data_rename(x, replacement = \"long_letter_{letter}\"),\n    c(\n      \"long_letter_a1\",\n      \"long_letter_b1\",\n      \"long_letter_c1\",\n      \"long_letter_d1\",\n      \"long_letter_e1\",\n      \"long_letter_f1\",\n      \"long_letter_g1\",\n      \"long_letter_h1\",\n      \"long_letter_i1\",\n      \"long_letter_j1\",\n      \"long_letter_k1\",\n      \"long_letter_l1\",\n      \"long_letter_m1\",\n      \"long_letter_n1\",\n      \"long_letter_o1\",\n      \"long_letter_p1\",\n      \"long_letter_q1\",\n      \"long_letter_r1\",\n      \"long_letter_s1\",\n      \"long_letter_t1\",\n      \"long_letter_u1\",\n      \"long_letter_v1\",\n      \"long_letter_w1\",\n      \"long_letter_x1\",\n      \"long_letter_y1\",\n      \"long_letter_z1\",\n      \"long_letter_a2\",\n      \"long_letter_b2\",\n      \"long_letter_c2\",\n      \"long_letter_d2\",\n      \"long_letter_e2\",\n      \"long_letter_f2\",\n      \"long_letter_g2\",\n      \"long_letter_h2\",\n      \"long_letter_i2\",\n      \"long_letter_j2\",\n      \"long_letter_k2\",\n      \"long_letter_l2\"\n    )\n  )\n})\n\nskip_if_not_installed(\"withr\")\nwithr::with_environment(\n  new.env(),\n  test_that(\"data_rename glue-style, environment\", {\n    data(mtcars)\n    x <- c(\"hi\", \"there\", \"!\")\n    out <- data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"col_{x}\")\n    expect_named(out, c(\"col_hi\", \"col_there\", \"col_!\"))\n    expect_error(\n      data_rename(mtcars[1:3], c(\"mpg\", \"disp\"), \"col_{x}\"),\n      regex = \"The number of values\"\n    )\n  })\n)\n\nwithr::with_environment(\n  new.env(),\n  test_that(\"data_rename glue-style, object not in environment\", {\n    data(mtcars)\n    expect_error(\n      data_rename(mtcars[1:3], c(\"mpg\", \"cyl\", \"disp\"), \"col_{x}\"),\n      regex = \"The object\"\n    )\n  })\n)\n\nwithr::with_environment(\n  new.env(),\n  test_that(\"data_rename glue-style, function in environment\", {\n    data(mtcars)\n    my_fun <- function(cols_to_rename) {\n      data_rename(head(mtcars)[, 1:6], cols_to_rename, \"new_{col}\")\n    }\n    expect_named(\n      my_fun(c(\"mpg\", \"drat\")),\n      c(\"new_mpg\", \"cyl\", \"disp\", \"hp\", \"new_drat\", \"wt\")\n    )\n    expect_named(\n      my_fun(\"mpg\"),\n      c(\"new_mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"wt\")\n    )\n  })\n)\n\ntest_that(\"works with lists\", {\n  result <- list(x = 1, y = 2)\n  expect_error(\n    data_rename(result, select = names(result), replacement = c(\"a\", \"b\")),\n    regex = \"must be a data frame\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_reorder.R",
    "content": "test_that(\"data_reorder works as expected\", {\n  expect_named(\n    data_reorder(iris, c(\"Species\", \"Sepal.Length\")),\n    c(\"Species\", \"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n\n  expect_warning(expect_named(\n    data_reorder(iris, c(\"Species\", \"dupa\")),\n    c(\"Species\", \"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  ))\n})\n\n\n# preserve attributes --------------------------\n\ntest_that(\"data_reorder preserves attributes\", {\n  skip_if_not_installed(\"parameters\")\n\n  m <- lm(Sepal.Length ~ Species, data = iris)\n  out <- parameters::parameters(m)\n  a1 <- attributes(out)\n\n  out2 <- data_reorder(out, 4:6)\n  a2 <- attributes(out2)\n\n  # attributes may not be in the same order\n  expect_true(all(names(a1) %in% names(a2)))\n  expect_length(a1, length(a2))\n})\n"
  },
  {
    "path": "tests/testthat/test-data_replicate.R",
    "content": "test_that(\"data_replicate: simple use case\", {\n  data(mtcars)\n  d <- head(mtcars)\n  out <- data_replicate(d, \"carb\")\n  expect_identical(dim(out), c(13L, 10L))\n  expect_identical(\n    out$disp,\n    c(160, 160, 160, 160, 160, 160, 160, 160, 108, 258, 360, 360, 225)\n  )\n  expect_named(\n    out,\n    c(\"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"wt\", \"qsec\", \"vs\", \"am\", \"gear\")\n  )\n\n  out <- data_replicate(d, 11)\n  expect_identical(dim(out), c(13L, 10L))\n  expect_identical(\n    out$disp,\n    c(160, 160, 160, 160, 160, 160, 160, 160, 108, 258, 360, 360, 225)\n  )\n  expect_named(\n    out,\n    c(\"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"wt\", \"qsec\", \"vs\", \"am\", \"gear\")\n  )\n\n  d$mpg[5] <- NA\n  out <- data_replicate(d, \"carb\")\n  expect_identical(dim(out), c(13L, 10L))\n  expect_identical(\n    out$mpg,\n    c(21, 21, 21, 21, 21, 21, 21, 21, 22.8, 21.4, NA, NA, 18.1)\n  )\n  expect_named(\n    out,\n    c(\"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"wt\", \"qsec\", \"vs\", \"am\", \"gear\")\n  )\n\n  d$carb[3] <- NA\n  out <- data_replicate(d, \"carb\", remove_na = TRUE)\n  expect_identical(dim(out), c(12L, 10L))\n  expect_identical(\n    out$mpg,\n    c(21, 21, 21, 21, 21, 21, 21, 21, 21.4, NA, NA, 18.1)\n  )\n  expect_named(\n    out,\n    c(\"mpg\", \"cyl\", \"disp\", \"hp\", \"drat\", \"wt\", \"qsec\", \"vs\", \"am\", \"gear\")\n  )\n\n  out <- data_replicate(d, \"carb\", select = c(\"disp\", \"hp\"), remove_na = TRUE)\n  expect_identical(dim(out), c(12L, 2L))\n  expect_identical(\n    out$disp,\n    c(160, 160, 160, 160, 160, 160, 160, 160, 258, 360, 360, 225)\n  )\n  expect_named(out, c(\"disp\", \"hp\"))\n\n  d <- data.frame(\n    a = c(\"a\", \"b\", \"c\"),\n    b = 1:3,\n    rep = c(3, 2, 4),\n    stringsAsFactors = FALSE\n  )\n  out <- data_replicate(d, \"rep\")\n  expect_identical(out$a, c(\"a\", \"a\", \"a\", \"b\", \"b\", \"c\", \"c\", \"c\", \"c\"))\n})\n\n\ntest_that(\"data_replicate: errors\", {\n  data(mtcars)\n  d <- head(mtcars)\n  expect_error(data_replicate(d), regex = \"No column\")\n  expect_error(\n    data_replicate(d, expand = c(\"mpg\", \"gear\")),\n    regex = \"a single string\"\n  )\n  expect_error(\n    data_replicate(d, expand = \"geas\"),\n    regex = \"The column provided\"\n  )\n  expect_error(\n    data_replicate(d, expand = \"qsec\"),\n    regex = \"The column provided\"\n  )\n  d$carb[3] <- NA\n  expect_error(data_replicate(d, \"carb\"), regex = \"missing values\")\n  d <- head(mtcars)\n  d$carb[3] <- Inf\n  expect_error(data_replicate(d, \"carb\"), regex = \"infinite values\")\n})\n\n\ntest_that(\"data_replicate: don't simplify if only one column left\", {\n  a <- c(1, 2, 3, 4)\n  b <- c(4, 3, 2, 1)\n  nrtimes <- c(1, 2, 0, 1)\n\n  d <- data.frame(a, b, nrtimes)\n  out <- data_replicate(d, expand = \"nrtimes\")\n  expect_identical(dim(out), c(4L, 2L))\n\n  d <- data.frame(a, nrtimes)\n  out <- data_replicate(d, expand = \"nrtimes\")\n  expect_identical(dim(out), c(4L, 1L))\n})\n"
  },
  {
    "path": "tests/testthat/test-data_rescale.R",
    "content": "test_that(\"rescale works as expected\", {\n  expect_equal(\n    rescale(c(0, 1, 5, -5, -2), to = NULL),\n    c(0, 1, 5, -5, -2),\n    ignore_attr = TRUE\n  )\n\n  expect_equal(\n    rescale(rep(NA_real_, 3)),\n    rep(NA_real_, 3),\n    ignore_attr = TRUE\n  )\n\n  expect_message(rescale(iris$Species))\n\n  expect_equal(\n    rescale(c(0, 1, 5, -5, -2)),\n    c(50, 60, 100, 0, 30),\n    ignore_attr = TRUE\n  )\n\n  expect_equal(\n    rescale(c(0, 1, 5, -5, -2), to = c(-5, 5)),\n    c(0, 1, 5, -5, -2),\n    ignore_attr = TRUE\n  )\n\n  expect_equal(\n    rescale(c(1, 3, 4), to = c(0, 40), range = c(0, 4)),\n    c(10, 30, 40),\n    ignore_attr = TRUE\n  )\n\n  expect_snapshot(head(rescale(iris, to = c(0, 1))))\n\n  expect_snapshot(head(rescale(iris, to = c(0, 1), select = \"Sepal.Length\")))\n\n  expect_snapshot(\n    head(rescale(\n      iris,\n      to = list(\n        Sepal.Length = c(0, 1),\n        Petal.Length = c(-1, 0)\n      )\n    ))\n  )\n})\n\n\ntest_that(\"rescale works with select helpers\", {\n  out <- rescale(iris, to = c(0, 1), select = c(\"Sepal.Width\", \"Sepal.Length\"))\n  expect_equal(\n    head(out$Sepal.Width),\n    c(0.625, 0.41667, 0.5, 0.45833, 0.66667, 0.79167),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    head(out$Petal.Length),\n    head(iris$Petal.Length),\n    tolerance = 1e-3\n  )\n\n  # check class attributes\n  expect_identical(\n    vapply(out, class, character(1)),\n    c(\n      Sepal.Length = \"numeric\",\n      Sepal.Width = \"numeric\",\n      Petal.Length = \"numeric\",\n      Petal.Width = \"numeric\",\n      Species = \"factor\"\n    )\n  )\n\n  out <- rescale(iris, to = c(0, 1), select = starts_with(\"Sepal\"))\n  expect_equal(\n    head(out$Sepal.Width),\n    c(0.625, 0.41667, 0.5, 0.45833, 0.66667, 0.79167),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    head(out$Petal.Length),\n    head(iris$Petal.Length),\n    tolerance = 1e-3\n  )\n\n  skip_if_not_installed(\"poorman\")\n\n  x <- poorman::group_by(iris, Species)\n  out <- rescale(x, to = c(0, 1), select = starts_with(\"Sepal\"))\n  expect_equal(\n    head(out$Sepal.Width),\n    c(0.57143, 0.33333, 0.42857, 0.38095, 0.61905, 0.7619),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    head(out$Petal.Length),\n    head(iris$Petal.Length),\n    tolerance = 1e-3\n  )\n})\n\n\n# grouped df ------------------------------\ntest_that(\"rescale works grouped df and append\", {\n  out <- rescale(\n    iris,\n    to = c(0, 1),\n    select = c(\"Sepal.Width\", \"Sepal.Length\"),\n    append = TRUE\n  )\n  expect_equal(\n    head(out$Sepal.Width_r),\n    c(0.625, 0.41667, 0.5, 0.45833, 0.66667, 0.79167),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    head(out$Petal.Length),\n    head(iris$Petal.Length),\n    tolerance = 1e-3\n  )\n  expect_identical(\n    colnames(out),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"Sepal.Width_r\",\n      \"Sepal.Length_r\"\n    )\n  )\n\n  skip_if_not_installed(\"poorman\")\n\n  x <- poorman::group_by(iris, Species)\n  out <- rescale(x, to = c(0, 1), select = starts_with(\"Sepal\"), append = TRUE)\n  expect_equal(\n    head(out$Sepal.Width_r),\n    c(0.57143, 0.33333, 0.42857, 0.38095, 0.61905, 0.7619),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    head(out$Petal.Length),\n    head(iris$Petal.Length),\n    tolerance = 1e-3\n  )\n  expect_identical(\n    colnames(out),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"Sepal.Length_r\",\n      \"Sepal.Width_r\"\n    )\n  )\n})\n\n\n# select helpers ------------------------------\ntest_that(\"data_rescale regex\", {\n  expect_equal(\n    rescale(mtcars, select = \"pg\", regex = TRUE)$mpg,\n    rescale(mtcars, select = \"mpg\")$mpg,\n    ignore_attr = TRUE\n  )\n})\n\n\n# expanding range ------------------------------\ntest_that(\"data_rescale can expand range\", {\n  # for vectors\n  x <- 5:15\n  expect_equal(\n    rescale(x, multiply = 1.1),\n    c(4.5, 5.6, 6.7, 7.8, 8.9, 10, 11.1, 12.2, 13.3, 14.4, 15.5),\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    rescale(x, multiply = 1.1),\n    rescale(x, add = 0.5),\n    ignore_attr = TRUE\n  )\n  expect_error(rescale(x, multiply = 0.9, add = 1), regex = \"Only one of\")\n  expect_error(rescale(x, multiply = c(1.2, 1.4)), regex = \"The length of\")\n\n  # different values for add\n  expect_equal(\n    rescale(x, add = c(1, 3)),\n    c(4, 5.4, 6.8, 8.2, 9.6, 11, 12.4, 13.8, 15.2, 16.6, 18),\n    ignore_attr = TRUE\n  )\n  expect_error(rescale(x, add = 1:3), regex = \"The length of\")\n\n  # works with NA\n  expect_equal(\n    rescale(rep(NA_real_, 3), multiply = 1.1),\n    rep(NA_real_, 3),\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    rescale(rep(NA_real_, 3), add = 2),\n    rep(NA_real_, 3),\n    ignore_attr = TRUE\n  )\n\n  # for data frames\n  d <- data.frame(x = 5:15, y = 5:15)\n  expect_equal(\n    rescale(d, multiply = 1.1),\n    rescale(d, add = 0.5),\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    rescale(d, multiply = list(x = 1.1, y = 0.5)),\n    rescale(d, add = list(x = 0.5, y = -2.5)),\n    ignore_attr = TRUE\n  )\n  # data frames accept multiple add-values per column\n  out <- rescale(d, add = list(x = c(1, 3), y = c(2, 4)))\n  expect_equal(\n    out$x,\n    rescale(d$x, add = c(1, 3)),\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    out$y,\n    rescale(d$y, add = c(2, 4)),\n    ignore_attr = TRUE\n  )\n\n  expect_error(rescale(d, multiply = 0.9, add = 1), regex = \"Only one of\")\n  expect_error(\n    rescale(d, multiply = list(x = 0.9, y = 2), add = list(y = 1)),\n    regex = \"Only one of\"\n  )\n  expect_error(rescale(d, multiply = c(0.9, 1.5)), regex = \"The length of\")\n})\n"
  },
  {
    "path": "tests/testthat/test-data_restoretype.R",
    "content": "test_that(\"data_restoretype works with reference\", {\n  data <- data.frame(\n    Sepal.Length = c(\"1\", \"3\", \"2\"),\n    Species = c(\"setosa\", \"versicolor\", \"setosa\"),\n    New = c(\"1\", \"3\", \"4\"),\n    stringsAsFactors = FALSE\n  )\n\n  fixed <- data_restoretype(data, reference = iris)\n\n  expect_equal(typeof(fixed$Species), typeof(iris$Species))\n  expect_equal(typeof(fixed$Sepal.Length), typeof(iris$Sepal.Length))\n  expect_equal(typeof(fixed$New), \"character\")\n})\n\n\ntest_that(\"data_restoretype works without reference\", {\n  data <- data.frame(\n    Sepal.Length = c(\"1\", \"3\", \"2\"),\n    Species = c(\"setosa\", \"versicolor\", \"setosa\"),\n    New = c(\"1\", \"3\", \"4\"),\n    stringsAsFactors = FALSE\n  )\n\n  expect_equal(\n    data_restoretype(data, reference = NULL),\n    data.frame(\n      Sepal.Length = c(1, 3, 2),\n      Species = c(\"setosa\", \"versicolor\", \"setosa\"),\n      New = c(1, 3, 4),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_reverse.R",
    "content": "# explanation of how reverse works:\n# https://github.com/easystats/datawizard/issues/106#issuecomment-1066628399\n\ntest_that(\"reverse works with numeric\", {\n  expect_identical(\n    reverse(1:5),\n    as.double(5:1)\n  )\n  expect_identical(\n    reverse(-2:2),\n    as.double(2:-2)\n  )\n})\n\ntest_that(\"reverse works with factor\", {\n  expect_identical(\n    reverse(factor(1:5)),\n    factor(5:1)\n  )\n  expect_identical(\n    reverse(factor(-2:2)),\n    factor(2:-2)\n  )\n})\n\ntest_that(\"reverse works with data frame\", {\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse(test, select = \"x\"),\n    data.frame(\n      x = as.double(5:1),\n      y = c(3, 8, 2, 5, 1)\n    )\n  )\n  expect_identical(\n    reverse(test, exclude = \"x\"),\n    data.frame(\n      x = 1:5,\n      y = c(6, 1, 7, 4, 8)\n    )\n  )\n  expect_identical(\n    reverse(test),\n    data.frame(\n      x = as.double(5:1),\n      y = c(6, 1, 7, 4, 8)\n    )\n  )\n})\n\ntest_that(\"reverse works with data frame and append\", {\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse(test, select = \"x\", append = TRUE),\n    data.frame(\n      x = 1:5,\n      y = c(3, 8, 2, 5, 1),\n      x_r = as.double(5:1)\n    )\n  )\n  expect_identical(\n    reverse(test, append = TRUE),\n    data.frame(\n      x = 1:5,\n      y = c(3, 8, 2, 5, 1),\n      x_r = as.double(5:1),\n      y_r = c(6, 1, 7, 4, 8)\n    )\n  )\n})\n\ntest_that(\"reverse: arg 'select' works with formula\", {\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse(test, select = ~x),\n    data.frame(\n      x = as.double(5:1),\n      y = c(3, 8, 2, 5, 1)\n    )\n  )\n  expect_identical(\n    reverse(test, select = ~ x + y),\n    data.frame(\n      x = as.double(5:1),\n      y = c(6, 1, 7, 4, 8)\n    )\n  )\n})\n\ntest_that(\"reverse: arg 'exclude' works with formula\", {\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse(test, exclude = ~x),\n    data.frame(\n      x = 1:5,\n      y = c(6, 1, 7, 4, 8)\n    )\n  )\n  expect_identical(\n    reverse(test, exclude = ~ x + y),\n    test\n  )\n})\n\ntest_that(\"reverse: argument 'range' works\", {\n  expect_identical(\n    reverse(c(1, 3, 4), range = c(0, 4)),\n    c(3, 1, 0)\n  )\n  expect_identical(\n    reverse(factor(c(1, 2, 3, 4, 5)), range = 0:10),\n    factor(9:5, levels = 0:10)\n  )\n\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse(test, select = \"x\", range = c(0, 8)),\n    data.frame(\n      x = as.double(7:3),\n      y = c(3, 8, 2, 5, 1)\n    )\n  )\n  expect_identical(\n    reverse(test, range = c(0, 8)),\n    data.frame(\n      x = as.double(7:3),\n      y = c(5, 0, 6, 3, 7)\n    )\n  )\n})\n\ntest_that(\"reverse ignores NA\", {\n  expect_identical(\n    reverse(c(1, 2, 8, NA)),\n    c(8, 7, 1, NA)\n  )\n})\n\ntest_that(\"reverse returns NA if only NA provided\", {\n  expect_identical(\n    reverse(c(NA_real_, NA_real_)),\n    c(NA_real_, NA_real_)\n  )\n  expect_identical(\n    reverse(factor(c(NA, NA))),\n    factor(c(NA, NA))\n  )\n})\n\ntest_that(\"reverse warns if single value to reverse\", {\n  expect_warning(\n    reverse(1),\n    regexp = \"A `range` must be provided for data with only one unique value.\"\n  )\n  expect_warning(\n    reverse(factor(1)),\n    regexp = \"A `range` must be provided for data with only one unique value.\"\n  )\n})\n\ntest_that(\"reverse msg for unsupported\", {\n  expect_message(reverse(as.Date(c(\"2022-04-24\", \"2022-04-23\"))))\n})\n\n\n# Same tests with reverse_scale (alias) --------------------------\n\ntest_that(\"reverse_scale works with numeric\", {\n  expect_identical(\n    reverse_scale(1:5),\n    as.double(5:1)\n  )\n  expect_identical(\n    reverse_scale(-2:2),\n    as.double(2:-2)\n  )\n})\n\ntest_that(\"reverse_scale works with factor\", {\n  expect_identical(\n    reverse_scale(factor(1:5)),\n    factor(5:1)\n  )\n  expect_identical(\n    reverse_scale(factor(-2:2)),\n    factor(2:-2)\n  )\n})\n\ntest_that(\"reverse_scale works with data frame\", {\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse_scale(test, select = \"x\"),\n    data.frame(\n      x = as.double(5:1),\n      y = c(3, 8, 2, 5, 1)\n    )\n  )\n  expect_identical(\n    reverse_scale(test, exclude = \"x\"),\n    data.frame(\n      x = 1:5,\n      y = c(6, 1, 7, 4, 8)\n    )\n  )\n  expect_identical(\n    reverse_scale(test),\n    data.frame(\n      x = as.double(5:1),\n      y = c(6, 1, 7, 4, 8)\n    )\n  )\n})\n\ntest_that(\"reverse_scale: arg 'select' works with formula\", {\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse_scale(test, select = ~x),\n    data.frame(\n      x = as.double(5:1),\n      y = c(3, 8, 2, 5, 1)\n    )\n  )\n  expect_identical(\n    reverse_scale(test, select = ~ x + y),\n    data.frame(\n      x = as.double(5:1),\n      y = c(6, 1, 7, 4, 8)\n    )\n  )\n})\n\ntest_that(\"reverse_scale: arg 'exclude' works with formula\", {\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse_scale(test, exclude = ~x),\n    data.frame(\n      x = 1:5,\n      y = c(6, 1, 7, 4, 8)\n    )\n  )\n  expect_identical(\n    reverse_scale(test, exclude = ~ x + y),\n    test\n  )\n})\n\ntest_that(\"reverse_scale: argument 'range' works\", {\n  expect_identical(\n    reverse_scale(c(1, 3, 4), range = c(0, 4)),\n    c(3, 1, 0)\n  )\n  expect_identical(\n    reverse_scale(factor(c(1, 2, 3, 4, 5)), range = 0:10),\n    factor(9:5, levels = 0:10)\n  )\n\n  test <- data.frame(\n    x = 1:5,\n    y = c(3, 8, 2, 5, 1)\n  )\n  expect_identical(\n    reverse_scale(test, select = \"x\", range = c(0, 8)),\n    data.frame(\n      x = as.double(7:3),\n      y = c(3, 8, 2, 5, 1)\n    )\n  )\n  expect_identical(\n    reverse_scale(test, range = c(0, 8)),\n    data.frame(\n      x = as.double(7:3),\n      y = c(5, 0, 6, 3, 7)\n    )\n  )\n})\n\ntest_that(\"reverse_scale ignores NA\", {\n  expect_identical(\n    reverse_scale(c(1, 2, 8, NA)),\n    c(8, 7, 1, NA)\n  )\n})\n\ntest_that(\"reverse_scale returns NA if only NA provided\", {\n  expect_identical(\n    reverse_scale(c(NA_real_, NA_real_)),\n    c(NA_real_, NA_real_)\n  )\n  expect_identical(\n    reverse_scale(factor(c(NA, NA))),\n    factor(c(NA, NA))\n  )\n})\n\ntest_that(\"reverse_scale warns if single value to reverse\", {\n  expect_warning(\n    reverse_scale(1),\n    regexp = \"A `range` must be provided for data with only one unique value.\"\n  )\n  expect_warning(\n    reverse_scale(factor(1)),\n    regexp = \"A `range` must be provided for data with only one unique value.\"\n  )\n})\n\n\ntest_that(\"reverse_scale select helpers\", {\n  data(iris)\n  out <- rescale(\n    iris,\n    to = list(\n      Sepal.Length = c(0, 1),\n      Petal.Length = c(-1, 0)\n    ),\n    select = ends_with(\"length\")\n  )\n\n  expect_identical(out$Sepal.Length, iris$Sepal.Length, tolerance = 1e-3)\n\n  out <- rescale(\n    iris,\n    to = list(\n      Sepal.Length = c(0, 1),\n      Petal.Length = c(-1, 0)\n    ),\n    select = ends_with(\"length\"),\n    ignore_case = TRUE\n  )\n\n  expect_identical(\n    head(out$Sepal.Length),\n    c(0.22222, 0.16667, 0.11111, 0.08333, 0.19444, 0.30556),\n    tolerance = 1e-3\n  )\n})\n\n\n# with grouped data -------------------------------------------\n\nset.seed(123)\nvalue1 <- sample(1:10, 6, replace = TRUE)\nset.seed(456)\nvalue2 <- sample(1:10, 6, replace = TRUE)\n\ntest_df <- data.frame(\n  id = rep(c(\"A\", \"B\"), each = 3),\n  value1 = value1,\n  value2 = value2,\n  stringsAsFactors = FALSE\n)\n\ntest_that(\"reverse works with data frames (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  expect_identical(\n    test_df %>%\n      poorman::group_by(id) %>%\n      reverse(exclude = \"id\") %>%\n      poorman::ungroup(),\n    data.frame(\n      id = rep(c(\"A\", \"B\"), each = 3),\n      value1 = c(10, 10, 3, 6, 2, 3),\n      value2 = c(4, 6, 3, 10, 6, 5),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\n\ntest_that(\"reverse works with grouped data frames and append\", {\n  skip_if_not_installed(\"poorman\")\n\n  test <- data.frame(\n    x = 1:6,\n    y = c(3, 8, 2, 5, 1, 4),\n    grp = rep(c(\"a\", \"b\"), 3),\n    stringsAsFactors = FALSE\n  )\n  expect_identical(\n    test %>%\n      poorman::group_by(grp) %>%\n      reverse(append = TRUE) %>%\n      poorman::ungroup(),\n    data.frame(\n      x = 1:6,\n      y = c(3, 8, 2, 5, 1, 4),\n      grp = rep(c(\"a\", \"b\"), 3),\n      x_r = as.double(c(5, 6, 3, 4, 1, 2)),\n      y_r = as.double(c(1, 4, 2, 7, 3, 8)),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\n\nset.seed(789)\nvalue1 <- sample(c(1:10, NA), 6, replace = TRUE)\nset.seed(10)\nvalue2 <- sample(c(1:10, NA), 6, replace = TRUE)\n\ntest_df <- data.frame(\n  id = rep(c(\"A\", \"B\"), each = 3),\n  value1 = value1,\n  value2 = value2,\n  stringsAsFactors = FALSE\n)\n\ntest_that(\"reverse works with data frames containing NAs (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  expect_identical(\n    test_df %>%\n      poorman::group_by(id) %>%\n      reverse(exclude = \"id\") %>%\n      poorman::ungroup(),\n    data.frame(\n      id = rep(c(\"A\", \"B\"), each = 3),\n      value1 = c(10, 4, 4, 5, 3, 4),\n      value2 = c(NA, 10, 9, 7, 6, 8),\n      stringsAsFactors = FALSE\n    )\n  )\n})\n\n# select helpers ------------------------------\ntest_that(\"reverse regex\", {\n  expect_identical(\n    reverse(mtcars, select = \"arb\", regex = TRUE),\n    reverse(mtcars, select = \"carb\")\n  )\n})\n\n\n# work or give informative errors / warnings (#380) ------------------\ntest_that(\"reverse, larger range\", {\n  # works\n  expect_identical(\n    reverse(c(1, 3, 4), range = c(0, 4)),\n    c(3, 1, 0)\n  )\n  expect_identical(\n    reverse(factor(c(1, 3, 4)), range = 0:4),\n    structure(\n      c(4L, 2L, 1L),\n      levels = c(\"0\", \"1\", \"2\", \"3\", \"4\"),\n      class = \"factor\"\n    )\n  )\n  expect_identical(\n    reverse(factor(c(1, 3, 4)), range = c(0, 4)),\n    structure(\n      c(4L, 2L, 1L),\n      levels = c(\"0\", \"1\", \"2\", \"3\", \"4\"),\n      class = \"factor\"\n    )\n  )\n\n  # errors on invalid input\n  expect_error(reverse(c(1, 3, 4), range = 0:4))\n  expect_error(reverse(factor(c(1, 3, 4, 5)), range = c(0, 2, 4)))\n  # errors on invalid input (NA in range)\n  expect_error(reverse(c(1, 3, 4), range = c(1, NA)), regex = \"missing\")\n  expect_error(\n    reverse(factor(letters[1:3]), range = c(1, NA)),\n    regex = \"missing\"\n  )\n\n  # warns\n  expect_warning(\n    reverse(factor(c(\"a\", \"b\", \"c\")), range = c(1, 3, 5, 7)),\n    regex = \"No current\"\n  )\n  expect_warning(\n    reverse(factor(c(9, 10, 11)), range = c(1, 3, 5, 7)),\n    regex = \"No current\"\n  )\n  expect_warning(\n    reverse(factor(c(1, 3, 11)), range = c(1, 3, 5, 7)),\n    regex = \"Not all\"\n  )\n\n  # silent\n  expect_silent(reverse(\n    factor(c(\"a\", \"b\", \"c\")),\n    range = c(1, 3, 5, 7),\n    verbose = FALSE\n  ))\n  expect_silent(reverse(\n    factor(c(9, 10, 11)),\n    range = c(1, 3, 5, 7),\n    verbose = FALSE\n  ))\n  expect_silent(reverse(\n    factor(c(1, 3, 11)),\n    range = c(1, 3, 5, 7),\n    verbose = FALSE\n  ))\n\n  # works as intended\n  expect_identical(\n    reverse(factor(c(1, 3, 11)), range = c(1, 3, 5, 7), verbose = FALSE),\n    structure(c(4L, 3L, NA), levels = c(\"1\", \"3\", \"5\", \"7\"), class = \"factor\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_rotate.R",
    "content": "test_that(\"rotate data works as expected\", {\n  df <- mtcars[1:3, 1:4]\n\n  expect_equal(\n    data_rotate(df),\n    structure(\n      list(\n        `Mazda RX4` = c(21, 6, 160, 110),\n        `Mazda RX4 Wag` = c(21, 6, 160, 110),\n        `Datsun 710` = c(22.8, 4, 108, 93)\n      ),\n      class = \"data.frame\",\n      row.names = c(\"mpg\", \"cyl\", \"disp\", \"hp\")\n    )\n  )\n\n  expect_equal(\n    data_rotate(df, rownames = \"property\"),\n    structure(\n      list(\n        property = c(\"mpg\", \"cyl\", \"disp\", \"hp\"),\n        `Mazda RX4` = c(21, 6, 160, 110),\n        `Mazda RX4 Wag` = c(21, 6, 160, 110),\n        `Datsun 710` = c(22.8, 4, 108, 93)\n      ),\n      class = \"data.frame\",\n      row.names = c(NA, 4L)\n    )\n  )\n\n  expect_equal(\n    data_rotate(df, colnames = TRUE),\n    structure(\n      list(\n        `21` = c(6, 160, 110),\n        `21` = c(6, 160, 110),\n        `22.8` = c(4, 108, 93)\n      ),\n      class = \"data.frame\",\n      row.names = c(\"cyl\", \"disp\", \"hp\")\n    )\n  )\n\n  expect_equal(\n    data_rotate(df, rownames = \"property\", colnames = TRUE),\n    structure(\n      list(\n        property = c(\"cyl\", \"disp\", \"hp\"),\n        `21` = c(6, 160, 110),\n        `21` = c(6, 160, 110),\n        `22.8` = c(4, 108, 93)\n      ),\n      class = \"data.frame\",\n      row.names = c(NA, 3L)\n    )\n  )\n})\n\ntest_that(\"data_rotate, arg 'colnames' works\", {\n  df <- mtcars[1:3, 1:4]\n  df <- rownames_as_column(df)\n\n  expected <- data.frame(\n    `Mazda RX4` = c(21, 6, 160, 110),\n    `Mazda RX4 Wag` = c(21, 6, 160, 110),\n    `Datsun 710` = c(22.8, 4, 108, 93),\n    check.names = FALSE\n  )\n  row.names(expected) <- c(\"mpg\", \"cyl\", \"disp\", \"hp\")\n\n  expect_identical(\n    data_rotate(df, colnames = \"rowname\"),\n    expected\n  )\n})\n\ntest_that(\"data_rotate warns if mixed types of data\", {\n  df <- mtcars[1:3, 1:4]\n  df <- rownames_as_column(df)\n\n  expect_warning(\n    data_rotate(df),\n    \"mixed types of data\"\n  )\n\n  df$rowname <- factor(df$rowname)\n  expect_warning(\n    data_rotate(df),\n    \"mixed types of data\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_seek.R",
    "content": "test_that(\"data_seek - simple use case\", {\n  data(iris)\n  out <- data_seek(iris, \"Length\")\n  expect_identical(out$index, c(1L, 3L))\n  expect_identical(out$labels, c(\"Sepal.Length\", \"Petal.Length\"))\n})\n\ntest_that(\"data_seek - seek label attribute\", {\n  data(efc)\n  out <- data_seek(efc, \"dependency\")\n  expect_identical(out$index, which(colnames(efc) == out$column))\n  expect_identical(out$labels, \"elder's dependency\")\n})\n\ntest_that(\"data_seek - seek label attribute\", {\n  data(efc)\n  out <- data_seek(efc, \"female\")\n  expect_identical(nrow(out), 0L)\n  out <- data_seek(efc, \"female\", seek = \"all\")\n  expect_identical(out$index, which(colnames(efc) == out$column))\n  expect_identical(out$labels, \"elder's gender\")\n})\n\ntest_that(\"data_seek - fuzzy match\", {\n  data(iris)\n  out <- data_seek(iris, \"Lenght\")\n  expect_identical(nrow(out), 0L)\n  out <- data_seek(iris, \"Lenght\", fuzzy = TRUE)\n  expect_identical(out$index, which(colnames(iris) %in% out$column))\n  expect_identical(out$labels, c(\"Sepal.Length\", \"Petal.Length\"))\n})\n\ntest_that(\"data_seek - fuzzy match, value labels\", {\n  data(efc)\n  out <- data_seek(efc, \"femlae\", seek = \"all\", fuzzy = TRUE)\n  expect_identical(nrow(out), 1L)\n  expect_identical(out$index, which(colnames(efc) %in% out$column))\n  expect_identical(out$labels, \"elder's gender\")\n})\n\ntest_that(\"data_seek - multiple pattern\", {\n  data(efc)\n  out <- data_seek(efc, c(\"e16\", \"e42\"))\n  expect_identical(nrow(out), 2L)\n  expect_identical(out$index, which(colnames(efc) %in% out$column))\n  expect_identical(out$labels, c(\"elder's gender\", \"elder's dependency\"))\n  # only one match, typo\n  out <- data_seek(efc, c(\"femlae\", \"dependency\"))\n  expect_identical(nrow(out), 1L)\n  expect_identical(out$index, which(colnames(efc) %in% out$column))\n  expect_identical(out$labels, \"elder's dependency\")\n  # only one match, not searching in value labels\n  out <- data_seek(efc, c(\"female\", \"dependency\"))\n  expect_identical(nrow(out), 1L)\n  expect_identical(out$index, which(colnames(efc) %in% out$column))\n  expect_identical(out$labels, \"elder's dependency\")\n  # two matches\n  out <- data_seek(efc, c(\"female\", \"dependency\"), seek = \"all\")\n  expect_identical(nrow(out), 2L)\n  expect_identical(out$index, which(colnames(efc) %in% out$column))\n  expect_identical(out$labels, c(\"elder's gender\", \"elder's dependency\"))\n  # only one match, typo\n  out <- data_seek(efc, c(\"femlae\", \"dependency\"), seek = \"all\")\n  expect_identical(nrow(out), 1L)\n  expect_identical(out$index, which(colnames(efc) %in% out$column))\n  expect_identical(out$labels, \"elder's dependency\")\n  # two matches, despite typo\n  out <- data_seek(efc, c(\"femlae\", \"dependency\"), seek = \"all\", fuzzy = TRUE)\n  expect_identical(nrow(out), 2L)\n  expect_identical(out$index, which(colnames(efc) %in% out$column))\n  expect_identical(out$labels, c(\"elder's gender\", \"elder's dependency\"))\n})\n\ntest_that(\"data_seek - valid input\", {\n  expect_error(\n    data_seek(rnorm(10), \"Length\"),\n    regex = \"`data` must be a data frame.\"\n  )\n  expect_error(\n    data_seek(iris, \"Length\", seek = \"somewhere\"),\n    regex = \"`seek` must be\"\n  )\n})\n\ntest_that(\"data_seek - print\", {\n  expect_snapshot(data_seek(iris, \"Length\"))\n  expect_snapshot(data_seek(iris, \"abc\"))\n})\n"
  },
  {
    "path": "tests/testthat/test-data_select.R",
    "content": "# input check ---------------------\n\ntest_that(\"data_select checks for data frame\", {\n  expect_error(data_select(NULL), regexp = \"provided\")\n  x <- list(a = 1:2, b = letters[1:3])\n  expect_error(data_select(x), regexp = \"coerced\")\n})\n\n\n# select helpers ---------------------\n\ntest_that(\"data_select works with select helpers\", {\n  expect_identical(\n    data_select(iris, starts_with(\"Sepal\")),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n\n  expect_identical(\n    data_select(iris, ends_with(\"Width\")),\n    iris[c(\"Sepal.Width\", \"Petal.Width\")]\n  )\n\n  expect_identical(\n    data_select(iris, regex(\"\\\\.\")),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")]\n  )\n\n  expect_identical(\n    data_select(iris, contains(\"Wid\")),\n    iris[c(\"Sepal.Width\", \"Petal.Width\")]\n  )\n})\n\n\n# select helpers, negation ---------------------\n\ntest_that(\"data_select works with negation of select helpers\", {\n  expect_identical(\n    data_select(iris, -starts_with(\"Sepal\")),\n    iris[c(\"Petal.Length\", \"Petal.Width\", \"Species\")]\n  )\n\n  expect_identical(\n    data_select(iris, -ends_with(\"Width\")),\n    iris[c(\"Sepal.Length\", \"Petal.Length\", \"Species\")]\n  )\n})\n\n\n# select-nse with function  ---------------------\n\ntest_that(\"data_select works with select-functions\", {\n  expect_identical(\n    data_select(iris, is.numeric()),\n    iris[sapply(iris, is.numeric)]\n  )\n\n  expect_identical(\n    data_select(iris, is.numeric),\n    iris[sapply(iris, is.numeric)]\n  )\n\n  expect_identical(\n    data_select(iris, is.factor()),\n    iris[sapply(iris, is.factor)]\n  )\n\n  expect_identical(\n    data_select(iris, is.factor),\n    iris[sapply(iris, is.factor)]\n  )\n\n  expect_warning(expect_null(data_select(iris, is.logical())))\n})\n\n\n# select-nse with user-function  ---------------------\ntestfun <- function(i) {\n  is.numeric(i) && mean(i, na.rm = TRUE) > 3.5\n}\ntest_that(\"data_select works with user-defined select-functions\", {\n  expect_identical(data_select(iris, testfun), iris[sapply(iris, testfun)])\n  expect_identical(data_select(iris, -testfun), iris[!sapply(iris, testfun)])\n\n  testfun2 <- function(i) {\n    is.numeric(i) && mean(i, na.rm = TRUE) < 5\n  }\n  expect_identical(\n    data_select(iris, select = testfun, exclude = testfun2),\n    iris[\"Sepal.Length\"]\n  )\n  expect_identical(\n    data_select(iris, select = testfun, exclude = -testfun2),\n    iris[\"Petal.Length\"]\n  )\n})\n\n\n# select-nse with negation of functions  ---------------------\n\ntest_that(\"data_select works with negated select-functions\", {\n  expect_identical(\n    data_select(iris, -is.numeric()),\n    iris[sapply(iris, function(i) !is.numeric(i))] # nolint\n  )\n\n  expect_identical(\n    data_select(iris, -is.numeric),\n    iris[sapply(iris, function(i) !is.numeric(i))] # nolint\n  )\n\n  expect_identical(\n    data_select(iris, -is.factor()),\n    iris[sapply(iris, function(i) !is.factor(i))] # nolint\n  )\n\n  expect_identical(\n    data_select(iris, -is.factor),\n    iris[sapply(iris, function(i) !is.factor(i))] # nolint\n  )\n\n  expect_identical(data_select(iris, -is.logical), iris)\n})\n\n\n# select-nse with ranges  ---------------------\n\ntest_that(\"data_select works with ranges\", {\n  expect_identical(\n    data_select(iris, 2:3),\n    iris[2:3]\n  )\n\n  expect_identical(\n    data_select(iris, Sepal.Width:Petal.Length),\n    iris[2:3]\n  )\n})\n\n\n# select-nse with negated ranges  ---------------------\n\ntest_that(\"data_select works with negated ranges\", {\n  expect_identical(\n    data_select(iris, -(1:2)),\n    iris[c(3, 4, 5)]\n  )\n\n  expect_identical(\n    data_select(iris, -1:-2),\n    iris[c(3, 4, 5)]\n  )\n\n  expect_identical(\n    data_select(iris, exclude = -1:-2),\n    iris[1:2]\n  )\n\n  expect_identical(\n    data_select(iris, exclude = 2:3),\n    iris[c(1, 4, 5)]\n  )\n\n  expect_error(\n    data_select(iris, -Sepal.Width:Petal.Length),\n    \"can't mix negative and positive\"\n  )\n  expect_identical(\n    data_select(iris, -(Sepal.Width:Petal.Length)),\n    iris[c(1, 4, 5)]\n  )\n})\n\n\n# select-nse with formulas  ---------------------\n\ntest_that(\"data_select works with formulas\", {\n  expect_identical(\n    data_select(iris, ~ Sepal.Width + Petal.Length),\n    iris[2:3]\n  )\n\n  expect_identical(\n    data_select(iris, exclude = ~ Sepal.Width + Petal.Length),\n    iris[c(1, 4, 5)]\n  )\n})\n\n\n# select-nse, other cases ---------------------\n\ntest_that(\"data_select works, other cases\", {\n  expect_identical(data_select(iris), iris)\n\n  expect_identical(\n    data_select(iris, c(\"Petal.Width\", \"Sepal.Length\")),\n    iris[c(\"Petal.Width\", \"Sepal.Length\")]\n  )\n\n  expect_identical(\n    data_select(iris, -c(\"Petal.Width\", \"Sepal.Length\")),\n    iris[setdiff(colnames(iris), c(\"Petal.Width\", \"Sepal.Length\"))]\n  )\n\n  expect_identical(\n    data_select(iris, -Petal.Width),\n    iris[setdiff(colnames(iris), \"Petal.Width\")]\n  )\n\n  expect_identical(\n    data_select(mtcars, c(\"am\", \"gear\", \"cyl\")),\n    mtcars[c(\"am\", \"gear\", \"cyl\")]\n  )\n\n  expect_identical(\n    data_select(mtcars, c(\"vam\", \"gear\", \"cyl\")),\n    mtcars[c(\"gear\", \"cyl\")]\n  )\n\n  expect_warning(expect_null(data_select(mtcars, ends_with(\"abc\"))))\n\n  expect_identical(\n    data_select(mtcars, regex(\"rb$\")),\n    mtcars[\"carb\"]\n  )\n\n  expect_identical(\n    data_select(mtcars, regex(\"^c\")),\n    mtcars[c(\"cyl\", \"carb\")]\n  )\n\n  expect_warning(expect_null(data_select(mtcars, \"^c\")))\n\n  expect_identical(\n    data_select(mtcars, regex(\"^C\"), ignore_case = TRUE),\n    mtcars[c(\"cyl\", \"carb\")]\n  )\n})\n\n\n# select-nse works when called from other function  ---------------------\n\ntest_that(\"data_select from other functions\", {\n  test_fun1 <- function(data, i) {\n    data_select(data, select = i)\n  }\n  expect_identical(\n    test_fun1(iris, c(\"Sepal.Length\", \"Sepal.Width\")),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n\n  expect_identical(\n    test_fun1(iris, starts_with(\"Sep\")),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n\n  test_fun1a <- function(data, i) {\n    data_select(data, select = i, regex = TRUE)\n  }\n  expect_identical(\n    test_fun1a(iris, \"Sep\"),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n\n  test_fun1b <- function(data, i) {\n    data_select(data, select = i, regex = TRUE)\n  }\n  expect_identical(\n    test_fun1b(iris, \"Width$\"),\n    iris[c(\"Sepal.Width\", \"Petal.Width\")]\n  )\n\n  test_fun1c <- function(data, i) {\n    data_select(data, select = -i)\n  }\n  expect_identical(\n    test_fun1c(iris, c(\"Sepal.Length\", \"Sepal.Width\")),\n    iris[c(\"Petal.Length\", \"Petal.Width\", \"Species\")]\n  )\n\n  test_fun2 <- function(data) {\n    data_select(data, select = starts_with(\"Sep\"))\n  }\n  expect_identical(\n    test_fun2(iris),\n    iris[c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n\n  test_fun3 <- function(data) {\n    i <- \"Sep\"\n    data_select(data, select = starts_with(i))\n  }\n  expect_identical(\n    test_fun3(iris),\n    iris[, c(\"Sepal.Length\", \"Sepal.Width\")]\n  )\n\n  test_top <- function(x) {\n    testfun1 <- function(i) {\n      is.numeric(i) && mean(i, na.rm = TRUE) > 3.5\n    }\n    testfun2 <- function(i) {\n      is.numeric(i) && mean(i, na.rm = TRUE) < 5\n    }\n    data_select(x, select = testfun, exclude = -testfun2)\n  }\n  expect_identical(test_top(iris), iris[\"Petal.Length\"])\n})\n\n\n# preserve attributes --------------------------\n\ntest_that(\"data_select preserves attributes\", {\n  skip_if_not_installed(\"parameters\")\n\n  m <- lm(Sepal.Length ~ Species, data = iris)\n  out <- parameters::parameters(m)\n  a1 <- attributes(out)\n\n  out2 <- data_select(out, 1:3)\n  a2 <- attributes(out2)\n\n  expect_identical(sort(names(a1)), sort(names(a2)))\n})\n\n# Select helpers work in functions and loops\n\ntest_that(\"select helpers work in functions and loops\", {\n  foo <- function(data, i) {\n    extract_column_names(data, select = starts_with(i))\n  }\n  expect_identical(\n    foo(iris, \"Sep\"),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  for (i in \"Sepal\") {\n    x <- extract_column_names(iris, select = starts_with(i))\n  }\n  expect_identical(\n    x,\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  for (i in \"Length\") {\n    x <- extract_column_names(iris, select = ends_with(i))\n  }\n  expect_identical(\n    x,\n    c(\"Sepal.Length\", \"Petal.Length\")\n  )\n})\n\ntest_that(\"select helpers work in functions and loops even if there's an object with the same name in the environment above\", {\n  i <- \"Petal\"\n  foo <- function(data, i) {\n    extract_column_names(data, select = starts_with(i))\n  }\n  expect_identical(\n    foo(iris, \"Sep\"),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  for (i in \"Sepal\") {\n    x <- extract_column_names(iris, select = starts_with(i))\n  }\n  expect_identical(\n    x,\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  i <- \"Width\"\n\n  for (i in \"Length\") {\n    x <- extract_column_names(iris, select = ends_with(i))\n  }\n  expect_identical(\n    x,\n    c(\"Sepal.Length\", \"Petal.Length\")\n  )\n})\n\ntest_that(\"old solution still works\", {\n  foo <- function(data) {\n    i <- \"Sep\"\n    extract_column_names(data, select = i, regex = TRUE)\n  }\n  expect_identical(\n    foo(iris),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n})\n\ntest_that(\"data_select renames variables on the fly\", {\n  data(mtcars)\n  expect_named(\n    data_select(mtcars, c(new = \"mpg\", old = \"cyl\", hoho = \"wt\")),\n    c(\"new\", \"old\", \"hoho\")\n  )\n  expect_named(\n    data_select(mtcars, c(new = \"mpg\", \"cyl\", hoho = \"wt\")),\n    c(\"new\", \"cyl\", \"hoho\")\n  )\n  expect_named(\n    data_select(mtcars, c(\"mpg\", \"cyl\", \"wt\")),\n    c(\"mpg\", \"cyl\", \"wt\")\n  )\n  # don't fail for non-existing columns\n  expect_named(\n    data_select(mtcars, c(new = \"mpg\", \"cyl\", hoho = \"wt\", test = \"grea\")),\n    c(\"new\", \"cyl\", \"hoho\")\n  )\n  # check that excluded variables don't cause troubles\n  expect_named(\n    data_select(mtcars, c(new = \"mpg\", \"cyl\", hoho = \"wt\"), exclude = \"wt\"),\n    c(\"new\", \"cyl\")\n  )\n  # error when names are not unique\n  expect_error(\n    data_select(mtcars, c(new = \"mpg\", old = \"cyl\", new = \"wt\")), # nolint\n    regex = \"Following names are duplicated\"\n  )\n  expect_error(\n    data_select(mtcars, c(new = \"mpg\", \"cyl\", cyl = \"wt\")), # nolint\n    regex = \"Following names are duplicated\"\n  )\n  # when new name is used in exclude, it should be ignored\n  expect_named(\n    data_select(mtcars, c(drat = \"mpg\"), exclude = \"drat\"),\n    \"drat\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_separate.R",
    "content": "test_that(\"data_separate: simple use case\", {\n  # simple case\n  d_sep <- data.frame(\n    x = c(\"1.a.6\", \"2.b.7\", \"3.c.8\"),\n    stringsAsFactors = FALSE\n  )\n\n  expect_error(data_separate(d_sep), regex = \"Either\")\n\n  # basic\n  expect_silent(data_separate(d_sep, guess_columns = \"mode\", verbose = FALSE))\n  expect_silent({\n    out <- data_separate(d_sep, guess_columns = \"mode\")\n  })\n  expect_identical(colnames(out), c(\"x_1\", \"x_2\", \"x_3\"))\n  expect_identical(out$x_1, c(\"1\", \"2\", \"3\"))\n  expect_identical(out$x_2, c(\"a\", \"b\", \"c\"))\n\n  # manual separator char\n  out2 <- data_separate(\n    d_sep,\n    separator = \"\\\\.\",\n    guess_columns = \"mode\",\n    verbose = FALSE\n  )\n  expect_identical(out, out2)\n\n  # non-existing separator char\n  expect_message(\n    data_separate(d_sep, separator = \"_\", guess_columns = \"mode\"),\n    regex = \"Separator probably not found\"\n  )\n\n  # column names\n  out <- data_separate(\n    d_sep,\n    new_columns = c(\"A1\", \"B2\", \"C3\"),\n    verbose = FALSE\n  )\n  expect_identical(colnames(out), c(\"A1\", \"B2\", \"C3\"))\n  expect_identical(out$A1, c(\"1\", \"2\", \"3\"))\n  expect_identical(out$B2, c(\"a\", \"b\", \"c\"))\n\n  out <- data_separate(d_sep, new_columns = letters[1:3], append = TRUE)\n  expect_equal(\n    out,\n    data.frame(\n      x = c(\"1.a.6\", \"2.b.7\", \"3.c.8\"),\n      a = c(\"1\", \"2\", \"3\"),\n      b = c(\"a\", \"b\", \"c\"),\n      c = c(\"6\", \"7\", \"8\"),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"data_separate: convert between data_unite and data_separate\", {\n  d_unite <- data.frame(\n    x = as.character(c(NA, 1:3)),\n    y = c(letters[1:3], NA_character_),\n    z = as.character(6:9),\n    m = c(\"X\", NA_character_, \"Y\", \"Z\"),\n    n = c(\"NATION\", \"COUNTRY\", \"NATION\", NA_character_),\n    stringsAsFactors = FALSE\n  )\n\n  out1 <- data_unite(d_unite, new_column = \"test\")\n  d_sep <- data_separate(\n    out1,\n    new_columns = c(\"x\", \"y\", \"z\", \"m\", \"n\"),\n    separator = \"_\"\n  )\n\n  expect_identical(d_unite, d_sep)\n})\n\n\ntest_that(\"data_separate: different number of values\", {\n  d_sep <- data.frame(\n    x = c(\"1.a.6\", \"2.b.7.d\", \"3.c.8\", \"5.j\"),\n    stringsAsFactors = FALSE\n  )\n\n  # basic use-case\n  expect_silent(data_separate(d_sep, guess_columns = \"mode\", verbose = FALSE))\n  expect_message(\n    expect_message(\n      expect_message(\n        data_separate(d_sep, guess_columns = \"mode\"),\n        regex = \"3 columns\"\n      ),\n      regex = \"have been dropped\"\n    ),\n    regex = \"filled with `NA`\"\n  )\n  out <- data_separate(d_sep, guess_columns = \"mode\", verbose = FALSE)\n  expect_identical(colnames(out), c(\"x_1\", \"x_2\", \"x_3\"))\n  expect_identical(out$x_1, c(\"1\", \"2\", \"3\", \"5\"))\n  expect_identical(out$x_2, c(\"a\", \"b\", \"c\", \"j\"))\n  expect_identical(out$x_3, c(\"6\", \"7\", \"8\", NA))\n\n  # fill missings left\n  out <- data_separate(\n    d_sep,\n    guess_columns = \"mode\",\n    fill = \"left\",\n    verbose = FALSE\n  )\n  expect_identical(colnames(out), c(\"x_1\", \"x_2\", \"x_3\"))\n  expect_identical(out$x_1, c(\"1\", \"2\", \"3\", NA))\n  expect_identical(out$x_2, c(\"a\", \"b\", \"c\", \"5\"))\n  expect_identical(out$x_3, c(\"6\", \"7\", \"8\", \"j\"))\n\n  # merge extra right\n  out <- data_separate(\n    d_sep,\n    guess_columns = \"mode\",\n    extra = \"merge_right\",\n    verbose = FALSE\n  )\n  expect_identical(colnames(out), c(\"x_1\", \"x_2\", \"x_3\"))\n  expect_identical(out$x_1, c(\"1\", \"2\", \"3\", \"5\"))\n  expect_identical(out$x_2, c(\"a\", \"b\", \"c\", \"j\"))\n  expect_identical(out$x_3, c(\"6\", \"7 d\", \"8\", NA))\n\n  # max columns\n  out <- data_separate(d_sep, guess_columns = \"max\", verbose = FALSE)\n  expect_equal(\n    out,\n    data.frame(\n      x_1 = c(\"1\", \"2\", \"3\", \"5\"),\n      x_2 = c(\"a\", \"b\", \"c\", \"j\"),\n      x_3 = c(\"6\", \"7\", \"8\", NA),\n      x_4 = c(NA, \"d\", NA, NA),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE\n  )\n\n  # min columns\n  out <- data_separate(d_sep, guess_columns = \"min\", verbose = FALSE)\n  expect_equal(\n    out,\n    data.frame(\n      x_1 = c(\"1\", \"2\", \"3\", \"5\"),\n      x_2 = c(\"a\", \"b\", \"c\", \"j\"),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE\n  )\n\n  out <- data_separate(\n    d_sep,\n    guess_columns = \"min\",\n    extra = \"merge_left\",\n    verbose = FALSE\n  )\n  expect_equal(\n    out,\n    data.frame(\n      x_1 = c(\"1 a\", \"2 b 7\", \"3 c\", \"5\"),\n      x_2 = c(\"6\", \"d\", \"8\", \"j\"),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE\n  )\n\n  out <- data_separate(\n    d_sep,\n    guess_columns = \"max\",\n    fill = \"left\",\n    verbose = FALSE\n  )\n  expect_equal(\n    out,\n    data.frame(\n      x_1 = c(NA, \"2\", NA, NA),\n      x_2 = c(\"1\", \"b\", \"3\", NA),\n      x_3 = c(\"a\", \"7\", \"c\", \"5\"),\n      x_4 = c(\"6\", \"d\", \"8\", \"j\"),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"data_separate: multiple columns\", {\n  d_sep <- data.frame(\n    x = c(\"1.a.6\", \"2.b.7.d\", \"3.c.8\", \"5.j\"),\n    y = c(\"m.n.99\", \"77.f.g\", \"44.9\", NA),\n    stringsAsFactors = FALSE\n  )\n\n  # select works\n  out <- data_separate(\n    d_sep,\n    select = \"x\",\n    guess_columns = \"mode\",\n    verbose = FALSE\n  )\n  expect_identical(colnames(out), c(\"y\", \"x_1\", \"x_2\", \"x_3\"))\n  expect_identical(out$x_1, c(\"1\", \"2\", \"3\", \"5\"))\n  expect_identical(out$x_2, c(\"a\", \"b\", \"c\", \"j\"))\n  expect_identical(out$x_3, c(\"6\", \"7\", \"8\", NA))\n\n  out <- data_separate(d_sep, guess_columns = \"mode\", verbose = FALSE)\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    guess_columns = \"mode\",\n    extra = \"merge_right\",\n    verbose = FALSE\n  )\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    new_columns = c(\"A\", \"B\", \"C\"),\n    extra = \"merge_right\",\n    verbose = FALSE\n  )\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    new_columns = c(\"A\", \"B\", \"C\"),\n    extra = \"merge_right\",\n    append = TRUE,\n    verbose = FALSE\n  )\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    guess_columns = \"mode\",\n    extra = \"drop_left\",\n    verbose = FALSE\n  )\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    new_columns = c(\"A\", \"B\", \"C\"),\n    fill = \"value_right\",\n    extra = \"merge_right\",\n    append = TRUE,\n    verbose = FALSE\n  )\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    new_columns = c(\"A\", \"B\", \"C\"),\n    fill = \"value_right\",\n    extra = \"merge_right\",\n    merge_multiple = TRUE,\n    append = TRUE,\n    verbose = FALSE\n  )\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    new_columns = c(\"A\", \"B\", \"C\"),\n    merge_multiple = TRUE,\n    append = TRUE,\n    verbose = FALSE\n  )\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    guess_columns = \"mode\",\n    fill = \"value_left\",\n    verbose = FALSE\n  )\n  expect_snapshot(out)\n})\n\n\ntest_that(\"data_separate: multiple columns, different lengths\", {\n  d_sep <- data.frame(\n    x = c(\"1.a.6\", \"2.b.7.d\", \"3.c.8\", \"5.j\"),\n    y = c(\"m.n.99.22\", \"77.f.g.34\", \"44.9\", NA),\n    stringsAsFactors = FALSE\n  )\n\n  # separate column names\n  out <- data_separate(\n    d_sep,\n    select = c(\"x\", \"y\"),\n    new_columns = list(x = c(\"A\", \"B\", \"C\"), y = c(\"EE\", \"FF\", \"GG\")),\n    verbose = FALSE\n  )\n  expect_named(out, c(\"A\", \"B\", \"C\", \"EE\", \"FF\", \"GG\"))\n  expect_snapshot(out)\n\n  out <- data_separate(\n    d_sep,\n    select = c(\"x\", \"y\"),\n    new_columns = list(x = c(\"A\", \"B\", \"C\"), y = c(\"EE\", \"FF\", \"GG\", \"HH\")),\n    verbose = FALSE\n  )\n  expect_named(out, c(\"A\", \"B\", \"C\", \"EE\", \"FF\", \"GG\", \"HH\"))\n  expect_snapshot(out)\n})\n\n\ntest_that(\"data_separate: numeric separator\", {\n  d_sep <- data.frame(\n    x = c(\"Thisisalongstring\", \"Doeshe1losteverything\", \"Wereme2longornot\"),\n    stringsAsFactors = FALSE\n  )\n\n  expect_silent({\n    out <- data_separate(\n      d_sep,\n      guess_columns = \"mode\",\n      separator = c(5, 7, 8, 12),\n      verbose = TRUE\n    )\n  })\n  expect_equal(\n    out,\n    data.frame(\n      x_1 = c(\"This\", \"Does\", \"Were\"),\n      x_2 = c(\"is\", \"he\", \"me\"),\n      x_3 = c(\"a\", \"1\", \"2\"),\n      x_4 = c(\"long\", \"lost\", \"long\"),\n      x_5 = c(\"string\", \"everything\", \"ornot\"),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE\n  )\n\n  d_sep <- data.frame(\n    x = c(\"Thisisalongstring\", \"Doeshe1losteverything\"),\n    y = c(\"Wereme2longornot\", NA),\n    stringsAsFactors = FALSE\n  )\n  expect_silent({\n    out <- data_separate(\n      d_sep,\n      separator = c(5, 7, 8, 12),\n      new_columns = LETTERS[1:5]\n    )\n  })\n  expect_equal(\n    out,\n    data.frame(\n      A = c(\"This\", \"Does\"),\n      B = c(\"is\", \"he\"),\n      C = c(\"a\", \"1\"),\n      D = c(\"long\", \"lost\"),\n      E = c(\"string\", \"everything\"),\n      A.1 = c(\"Were\", NA),\n      B.1 = c(\"me\", NA),\n      C.1 = c(\"2\", NA),\n      D.1 = c(\"long\", NA),\n      E.1 = c(\"ornot\", NA),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE\n  )\n\n  expect_error(\n    data_separate(\n      d_sep,\n      separator = c(5, 7, 8, 12),\n      new_columns = LETTERS[1:6]\n    ),\n    regex = \"went wrong\"\n  )\n})\n\n\ntest_that(\"data_separate: fail if invalid column selected\", {\n  d_sep <- data.frame(\n    x = c(\"1.a.6\", \"2.b.7.d\", \"3.c.8\", \"5.j\"),\n    y = c(\"m.n.99\", \"77.f.g\", \"44.9\", NA),\n    stringsAsFactors = FALSE\n  )\n  expect_warning(\n    expect_message(\n      data_separate(d_sep, guess_columns = \"mode\", select = \"z\"),\n      reg = \"not found\"\n    ),\n    regex = \"misspelled?\"\n  )\n  expect_identical(\n    data_separate(d_sep, guess_columns = \"mode\", select = \"z\", verbose = FALSE),\n    d_sep\n  )\n  expect_snapshot(data_separate(d_sep, guess_columns = \"mode\", select = NULL))\n})\n\n\ntest_that(\"data_separate: numeric column\", {\n  d_sep <- data.frame(\n    x = c(154353523, 535543532, 12342422, 15454334535),\n    y = c(\"m.n.99\", \"77.f.g\", \"44.9\", NA),\n    stringsAsFactors = FALSE\n  )\n  expect_message(\n    data_separate(d_sep, guess_columns = \"mode\", select = \"x\"),\n    regex = \"Separator probably\"\n  )\n  out <- data_separate(\n    d_sep,\n    guess_columns = \"mode\",\n    select = \"x\",\n    separator = c(3, 6, 9)\n  )\n  expect_snapshot(out)\n})\n"
  },
  {
    "path": "tests/testthat/test-data_shift.R",
    "content": "# numeric\ntest_that(\"slide\", {\n  x <- c(10, 11, 12)\n  expect_identical(slide(x), c(0, 1, 2))\n\n  x <- c(10, 11, 12)\n  expect_identical(slide(x, lowest = 10), x)\n\n  x <- c(10, 11, 12)\n  expect_identical(slide(x, lowest = 1), c(1, 2, 3))\n\n  x <- c(10, 11, NA, 12)\n  expect_identical(slide(x, lowest = 1), c(1, 2, NA, 3))\n})\n\n# factor\ntest_that(\"slide\", {\n  data(efc)\n  expect_message(expect_identical(slide(efc$e42dep), efc$e42dep))\n})\n\n# data frame\ntest_that(\"slide\", {\n  data(iris)\n  expect_message(\n    out <- slide(iris), # nolint\n    \"Shifting non-numeric variables is not possible\"\n  )\n  expect_identical(out$Species, iris$Species)\n  expect_identical(range(out$Sepal.Length), c(0, 3.6), tolerance = 1e-2)\n})\n\n# select helpers ------------------------------\ntest_that(\"slide regex\", {\n  expect_identical(\n    slide(mtcars, select = \"pg\", regex = TRUE),\n    slide(mtcars, select = \"mpg\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_summary.R",
    "content": "test_that(\"data_summary, single row summary\", {\n  data(iris)\n  out <- data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width))\n  expect_equal(out$MW, mean(iris$Sepal.Width), tolerance = 1e-4)\n  expect_equal(out$SD, sd(iris$Sepal.Width), tolerance = 1e-4)\n})\n\n\ntest_that(\"data_summary, single row summary, string expression\", {\n  data(iris)\n  out <- data_summary(iris, \"MW = mean(Sepal.Width)\", \"SD = sd(Sepal.Width)\")\n  expect_equal(out$MW, mean(iris$Sepal.Width), tolerance = 1e-4)\n  expect_equal(out$SD, sd(iris$Sepal.Width), tolerance = 1e-4)\n})\n\n\ntest_that(\"data_summary, summary for groups\", {\n  data(iris)\n  out <- data_summary(\n    iris,\n    MW = mean(Sepal.Width),\n    SD = sd(Sepal.Width),\n    by = \"Species\"\n  )\n  expect_equal(\n    out$MW,\n    aggregate(iris[\"Sepal.Width\"], list(iris$Species), mean)$Sepal.Width,\n    tolerance = 1e-4\n  )\n  expect_equal(\n    out$SD,\n    aggregate(iris[\"Sepal.Width\"], list(iris$Species), sd)$Sepal.Width,\n    tolerance = 1e-4\n  )\n})\n\n\ntest_that(\"data_summary, summary for groups, string expression\", {\n  data(iris)\n  out <- data_summary(\n    iris,\n    \"MW = mean(Sepal.Width)\",\n    \"SD = sd(Sepal.Width)\",\n    by = \"Species\"\n  )\n  expect_equal(\n    out$MW,\n    aggregate(iris[\"Sepal.Width\"], list(iris$Species), mean)$Sepal.Width,\n    tolerance = 1e-4\n  )\n  expect_equal(\n    out$SD,\n    aggregate(iris[\"Sepal.Width\"], list(iris$Species), sd)$Sepal.Width,\n    tolerance = 1e-4\n  )\n})\n\n\ntest_that(\"data_summary, grouped data frames\", {\n  data(iris)\n  d <- data_group(iris, \"Species\")\n  out <- data_summary(d, MW = mean(Sepal.Width), SD = sd(Sepal.Width))\n  expect_equal(\n    out$MW,\n    aggregate(iris[\"Sepal.Width\"], list(iris$Species), mean)$Sepal.Width,\n    tolerance = 1e-4\n  )\n  expect_equal(\n    out$SD,\n    aggregate(iris[\"Sepal.Width\"], list(iris$Species), sd)$Sepal.Width,\n    tolerance = 1e-4\n  )\n  # \"by\" overrides groups\n  data(mtcars)\n  d <- data_group(mtcars, \"gear\")\n  out <- data_summary(d, MW = mean(mpg), SD = sd(mpg), by = \"am\")\n  expect_identical(\n    out$MW,\n    aggregate(mtcars[\"mpg\"], list(mtcars$am), mean)$mpg\n  )\n})\n\n\ntest_that(\"data_summary, summary for multiple groups\", {\n  data(mtcars)\n  out <- data_summary(\n    mtcars,\n    MW = mean(mpg),\n    SD = sd(mpg),\n    by = c(\"am\", \"gear\")\n  )\n  expect_equal(\n    out$MW,\n    aggregate(mtcars[\"mpg\"], list(mtcars$am, mtcars$gear), mean)$mpg,\n    tolerance = 1e-4\n  )\n  expect_equal(\n    out$SD,\n    aggregate(mtcars[\"mpg\"], list(mtcars$am, mtcars$gear), sd)$mpg,\n    tolerance = 1e-4\n  )\n  x <- data_group(mtcars, c(\"am\", \"gear\"))\n  out <- data_summary(x, MW = mean(mpg), SD = sd(mpg))\n  expect_equal(\n    out$MW,\n    aggregate(mtcars[\"mpg\"], list(mtcars$am, mtcars$gear), mean)$mpg,\n    tolerance = 1e-4\n  )\n  expect_equal(\n    out$SD,\n    aggregate(mtcars[\"mpg\"], list(mtcars$am, mtcars$gear), sd)$mpg,\n    tolerance = 1e-4\n  )\n})\n\n\ntest_that(\"data_summary, errors\", {\n  data(iris)\n  data(mtcars)\n  # \"by\" must be character\n  expect_error(\n    data_summary(iris, MW = mean(Sepal.Width), SD = sd(Sepal.Width), by = 5),\n    regex = \"Argument `by` must be a character string\"\n  )\n  # \"by\" must be in data\n  expect_error(\n    data_summary(\n      iris,\n      MW = mean(Sepal.Width),\n      SD = sd(Sepal.Width),\n      by = \"Speceis\"\n    ),\n    regex = \"Variable \\\"Speceis\\\" not\"\n  )\n  # by for multiple variables\n  expect_error(\n    data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c(\"bam\", \"gear\")),\n    regex = \"Variable \\\"bam\\\" not\"\n  )\n  expect_error(\n    data_summary(mtcars, MW = mean(mpg), SD = sd(mpg), by = c(\"bam\", \"geas\")),\n    regex = \"Did you mean one of \\\"am\\\" or \\\"gear\\\"?\"\n  )\n  # not a data frame\n  expect_error(\n    data_summary(\n      iris$Sepal.Width,\n      MW = mean(Sepal.Width),\n      SD = sd(Sepal.Width)\n    ),\n    regex = \"only works for\"\n  )\n  # no expressions\n  expect_error(\n    data_summary(iris, by = \"Species\"),\n    regex = \"No expressions for calculating\"\n  )\n  # wrong expression\n  expect_error(\n    data_summary(mtcars, mw = mesn(mpg), by = \"am\"),\n    regex = \"There was an error\"\n  )\n  # wrong variable name\n  expect_error(\n    data_summary(mtcars, n = max(mpeg)),\n    regex = \"There was an error\"\n  )\n  # expression returns more than one value\n  expect_error(\n    data_summary(\n      mtcars,\n      n = unique(mpg),\n      j = c(min(am), max(am)),\n      by = c(\"am\", \"gear\")\n    ),\n    regex = \"Each expression must return\"\n  )\n})\n\n\ntest_that(\"data_summary, values_at\", {\n  data(mtcars)\n  out <- data_summary(\n    mtcars,\n    pos1 = mpg[1],\n    pos_end = mpg[length(mpg)],\n    by = c(\"am\", \"gear\")\n  )\n  # same as:\n  # dplyr::summarise(mtcars, pos1 = dplyr::first(mpg), pos_end = dplyr::last(mpg), .by = c(\"am\", \"gear\"))\n  expect_equal(out$pos1, c(21.4, 24.4, 21, 26), tolerance = 1e-3)\n  expect_equal(out$pos_end, c(19.2, 17.8, 21.4, 15), tolerance = 1e-3)\n})\n\n\ntest_that(\"data_summary, print\", {\n  data(mtcars)\n  out <- data_summary(\n    mtcars,\n    MW = mean(mpg),\n    SD = sd(mpg),\n    by = c(\"am\", \"gear\")\n  )\n  expect_snapshot(print(out))\n})\n\n\ntest_that(\"data_summary, with NA\", {\n  data(efc, package = \"datawizard\")\n  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = \"c172code\")\n  expect_snapshot(print(out))\n  out <- data_summary(\n    efc,\n    MW = mean(c12hour, na.rm = TRUE),\n    by = \"c172code\",\n    remove_na = TRUE\n  )\n  expect_snapshot(print(out))\n  # sorting for multiple groups\n  out <- data_summary(\n    efc,\n    MW = mean(c12hour, na.rm = TRUE),\n    by = c(\"e42dep\", \"c172code\")\n  )\n  expect_snapshot(print(out))\n})\n\n\ntest_that(\"data_summary, inside functions\", {\n  foo1 <- function(x, ...) {\n    datawizard::data_summary(x, ..., by = \"Species\")\n  }\n\n  foo2 <- function(x, by, ...) {\n    datawizard::data_summary(x, ..., by = by)\n  }\n\n  foo3 <- function(x, by) {\n    datawizard::data_summary(x, MW = mean(Sepal.Width), by = by)\n  }\n\n  data(iris)\n  out1 <- foo1(iris, MW = mean(Sepal.Width))\n  out2 <- foo2(iris, by = \"Species\", MW = mean(Sepal.Width))\n  out3 <- foo3(iris, \"Species\")\n  expect_equal(out1$MW, out2$MW, tolerance = 1e-4)\n  expect_equal(out1$MW, out3$MW, tolerance = 1e-4)\n})\n\n\ntest_that(\"data_summary, expression as variable\", {\n  data(mtcars)\n  a <- \"MW = mean(mpg)\"\n  b <- \"SD = sd(mpg)\"\n  out <- data_summary(mtcars, a, by = c(\"am\", \"gear\"))\n  expect_named(out, c(\"am\", \"gear\", \"MW\"))\n  expect_equal(\n    out$MW,\n    aggregate(mtcars[\"mpg\"], list(mtcars$am, mtcars$gear), mean)$mpg,\n    tolerance = 1e-4\n  )\n  expect_error(\n    data_summary(mtcars, a, b, by = c(\"am\", \"gear\")),\n    regex = \"You cannot mix\"\n  )\n  out <- data_summary(mtcars, c(a, b), by = c(\"am\", \"gear\"))\n  expect_named(out, c(\"am\", \"gear\", \"MW\", \"SD\"))\n  expect_equal(\n    out$SD,\n    aggregate(mtcars[\"mpg\"], list(mtcars$am, mtcars$gear), sd)$mpg,\n    tolerance = 1e-4\n  )\n})\n\n\ntest_that(\"data_summary, extra functions\", {\n  data(mtcars)\n  # n()\n  out <- data_summary(mtcars, n = n(), by = c(\"am\", \"gear\"))\n  expect_identical(out$n, c(15L, 4L, 8L, 5L))\n})\n\n\ntest_that(\"data_summary, bayestestR::ci\", {\n  skip_if_not_installed(\"bayestestR\")\n  data(mtcars)\n  out <- data_summary(\n    mtcars,\n    mean_value = mean(mpg),\n    ci = bayestestR::ci(mpg),\n    by = c(\"am\", \"gear\")\n  )\n  expect_named(out, c(\"am\", \"gear\", \"mean_value\", \"CI\", \"CI_low\", \"CI_high\"))\n  expect_snapshot(out)\n  out <- data_summary(\n    mtcars,\n    mw = mean(mpg),\n    test = bayestestR::ci(mpg),\n    yolo = c(mean(mpg), sd(mpg)),\n    by = c(\"am\", \"gear\")\n  )\n  expect_named(\n    out,\n    c(\"am\", \"gear\", \"mw\", \"CI\", \"CI_low\", \"CI_high\", \"yolo_1\", \"yolo_2\")\n  )\n})\n\ntest_that(\"no warning when variable name and function in global env clash, #583\", {\n  dat <- data.frame(rt = 1:10)\n  expect_silent(data_summary(dat, rt = mean(rt)))\n})\n\n\ntest_that(\"allow multiple columns for expressions\", {\n  set.seed(123)\n  d <- data.frame(\n    x = rnorm(100, 1, 1),\n    y = rnorm(100, 2, 2),\n    groups = rep(1:4, each = 25)\n  )\n\n  out <- data_summary(\n    d,\n    quant_x = quantile(x, c(0.25, 0.75)),\n    quant_y = quantile(y, c(0.25, 0.75)),\n    suffix = c(\"Q1\", \"Q3\")\n  )\n  expect_equal(\n    out$quant_xQ1,\n    0.50615,\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  expect_named(out, c(\"quant_xQ1\", \"quant_xQ3\", \"quant_yQ1\", \"quant_yQ3\"))\n\n  # automatic suffixes\n  out <- data_summary(\n    d,\n    quant_x = quantile(x, c(0.25, 0.75)),\n    quant_y = quantile(y, c(0.1, 0.9)),\n    suffix = NULL\n  )\n  expect_named(out, c(\"quant_x25%\", \"quant_x75%\", \"quant_y10%\", \"quant_y90%\"))\n\n  # use own suffix only for one expression - other expressions are\n  # suffixed with `_1`, `_2`, etc.\n  out <- data_summary(\n    d,\n    quant_x = quantile(x, c(0.25, 0.75)),\n    quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n    mean_x = mean(x),\n    suffix = list(quant_y = c(\"_Q1\", \"_Q2\", \"_Q3\")),\n    by = \"groups\"\n  )\n  expect_named(\n    out,\n    c(\n      \"groups\",\n      \"quant_x25%\",\n      \"quant_x75%\",\n      \"quant_y_Q1\",\n      \"quant_y_Q2\",\n      \"quant_y_Q3\",\n      \"mean_x\"\n    )\n  )\n\n  set.seed(123)\n  d <- data.frame(\n    x = rnorm(100, 1, 1),\n    y = rnorm(100, 2, 2),\n    w = rnorm(100, 3, 0.5),\n    z = rnorm(100, 4, 3),\n    groups = rep(1:4, each = 25)\n  )\n\n  out <- data_summary(\n    d,\n    quant_x = quantile(x, c(0.25, 0.75)),\n    mean_x = mean(x),\n    quant_y = quantile(y, c(0.25, 0.5, 0.75))\n  )\n  expect_equal(\n    out,\n    data.frame(\n      `quant_x25%` = 0.50615,\n      `quant_x75%` = 1.69182,\n      mean_x = 1.09041,\n      `quant_y25%` = 0.39779,\n      `quant_y50%` = 1.54834,\n      `quant_y75%` = 2.93569\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  out <- data_summary(\n    d,\n    quant_x = quantile(x, c(0.25, 0.75)),\n    mean_x = mean(x),\n    fivenum = fivenum(y)\n  )\n  expect_equal(\n    out,\n    data.frame(\n      `quant_x25%` = 0.50615,\n      `quant_x75%` = 1.69182,\n      mean_x = 1.09041,\n      fivenum_1 = -2.10649,\n      fivenum_2 = 0.36539,\n      fivenum_3 = 1.54834,\n      fivenum_4 = 2.96837,\n      fivenum_5 = 8.48208\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  out <- data_summary(\n    d,\n    quant_x = quantile(x, c(0.25, 0.75)),\n    mean_x = mean(x),\n    quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n    suffix = list(quant_y = c(\"_Q1\", \"_Q2\", \"_Q3\"))\n  )\n  expect_equal(\n    out,\n    data.frame(\n      `quant_x25%` = 0.50615,\n      `quant_x75%` = 1.69182,\n      mean_x = 1.09041,\n      quant_y_Q1 = 0.39779,\n      quant_y_Q2 = 1.54834,\n      quant_y_Q3 = 2.93569\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  out <- data_summary(\n    d,\n    quant_x = quantile(x, c(0.25, 0.75)),\n    mean_x = mean(x),\n    quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n    suffix = list(quant_x = c(\"Q1\", \"Q3\"), quant_y = c(\"_Q1\", \"_Q2\", \"_Q3\"))\n  )\n  expect_equal(\n    out,\n    data.frame(\n      quant_xQ1 = 0.50615,\n      quant_xQ3 = 1.69182,\n      mean_x = 1.09041,\n      quant_y_Q1 = 0.39779,\n      quant_y_Q2 = 1.54834,\n      quant_y_Q3 = 2.93569\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  out <- data_summary(\n    d,\n    quant_x = quantile(x, c(0.25, 0.5)),\n    quant_w = quantile(w, c(0.25, 0.5)),\n    quant_y = quantile(y, c(0.25, 0.5)),\n    quant_z = quantile(z, c(0.25, 0.5)),\n    suffix = c(\"_Q1\", \"_Q2\")\n  )\n  expect_equal(\n    out,\n    data.frame(\n      quant_x_Q1 = 0.50615,\n      quant_x_Q2 = 1.06176,\n      quant_w_Q1 = 2.73435,\n      quant_w_Q2 = 3.01796,\n      quant_y_Q1 = 0.39779,\n      quant_y_Q2 = 1.54834,\n      quant_z_Q1 = 1.81187,\n      quant_z_Q2 = 3.98947\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  # errors ------------------------------------------------------------------\n\n  expect_error(\n    data_summary(\n      d,\n      quant_x = quantile(x, c(0.25, 0.75)),\n      mean_x = mean(x),\n      quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n      suffix = list(quant_xy = c(\"_Q1\", \"_Q2\", \"_Q3\"))\n    ),\n    regex = \"Names of `suffix` must match the names\",\n    fixed = TRUE\n  )\n\n  expect_error(\n    data_summary(\n      d,\n      quant_x = quantile(x, c(0.25, 0.75)),\n      mean_x = mean(x),\n      quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n      suffix = list(c(\"Q1\", \"Q3\"), \"mean\", c(\"_Q1\", \"_Q2\", \"_Q3\"))\n    ),\n    regex = \"All elements of `suffix` must have names.\",\n    fixed = TRUE\n  )\n\n  expect_error(\n    data_summary(\n      d,\n      quant_x = quantile(x, c(0.25, 0.75)),\n      mean_x = mean(x),\n      quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n      suffix = c(\"_Q1\", \"_Q2\", \"_Q3\")\n    ),\n    regex = \"Argument `suffix` must have the same length\",\n    fixed = TRUE\n  )\n\n  expect_error(\n    data_summary(\n      d,\n      quant_x = quantile(x, c(0.25, 0.75)),\n      mean_x = mean(x),\n      quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n      suffix = list(quant_x = c(\"_Q1\", \"_Q2\", \"_Q3\"))\n    ),\n    regex = \"Argument `suffix` must have the same length\",\n    fixed = TRUE\n  )\n\n  expect_error(\n    data_summary(\n      d,\n      quant_x = quantile(x, c(0.25, 0.75)),\n      mean_x = mean(x),\n      quant_y = quantile(y, c(0.25, 0.5, 0.75)),\n      suffix = list(quant_x = c(\"Q1\", \"Q3\"), quant_y = c(\"_Q1\", \"_Q2\", \"_Q2\"))\n    ),\n    regex = \"All suffixes for a single expression must be unique\",\n    fixed = TRUE\n  )\n\n  expect_error(\n    data_summary(\n      d,\n      quant_x = quantile(x, c(0.25, 0.5)),\n      quant_w = quantile(w, c(0.25, 0.5)),\n      quant_y = quantile(y, c(0.25, 0.5)),\n      quant_z = quantile(z, c(0.25, 0.5)),\n      suffix = c(\"_Q1\", \"_Q2\", \"_Q3\")\n    ),\n    regex = \"Argument `suffix` must have the same length\",\n    fixed = TRUE\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_tabulate.R",
    "content": "test_that(\"data_tabulate factor\", {\n  data(efc, package = \"datawizard\")\n  x <- data_tabulate(efc$e42dep)\n  expect_identical(\n    as.vector(x$Value),\n    as.vector(sort(unique(\n      addNA(efc$e42dep)\n    )))\n  )\n  expect_identical(x$N, as.vector(table(addNA(efc$e42dep))))\n  expect_identical(\n    x$`Valid %`,\n    as.vector(c(\n      100 * table(efc$e42dep) / sum(!is.na(efc$e42dep)),\n      NA\n    )),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_tabulate numeric\", {\n  data(efc, package = \"datawizard\")\n  x <- data_tabulate(efc$neg_c_7)\n  expect_identical(\n    as.vector(x$Value),\n    as.vector(sort(unique(\n      addNA(efc$neg_c_7)\n    )))\n  )\n  expect_identical(x$N, as.vector(table(addNA(efc$neg_c_7))))\n  expect_identical(\n    x$`Valid %`,\n    as.vector(c(\n      100 * table(efc$neg_c_7) / sum(!is.na(efc$neg_c_7)),\n      NA\n    )),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_tabulate, HTML\", {\n  skip_if_not_installed(\"gt\")\n  data(efc, package = \"datawizard\")\n  expect_s3_class(print_html(data_tabulate(efc$c172code)), \"gt_tbl\")\n  expect_s3_class(print_html(data_tabulate(efc, \"c172code\")), \"gt_tbl\")\n  expect_s3_class(\n    display(data_tabulate(efc, \"c172code\"), format = \"html\"),\n    \"gt_tbl\"\n  )\n})\n\n\ntest_that(\"data_tabulate, tinytable\", {\n  skip_if_not_installed(\"tinytable\")\n  data(efc, package = \"datawizard\")\n  expect_snapshot(display(data_tabulate(efc$c172code), format = \"tt\"))\n  expect_snapshot(display(data_tabulate(efc, \"c172code\"), format = \"tt\"))\n})\n\n\ntest_that(\"data_tabulate, weights\", {\n  skip_if_not_installed(\"knitr\")\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  # vector/factor\n  out1 <- data_tabulate(efc$e42dep, weights = efc$weights)\n  out2 <- data_tabulate(efc$e42dep)\n  expect_equal(out1$N, c(3, 4, 26, 67, 5), ignore_attr = TRUE)\n  expect_equal(out2$N, c(2L, 4L, 28L, 63L, 3L), ignore_attr = TRUE)\n  expect_equal(\n    out1$N,\n    round(xtabs(efc$weights ~ efc$e42dep, addNA = TRUE)),\n    ignore_attr = TRUE\n  )\n  # data frames\n  out <- data_tabulate(efc, c(\"e42dep\", \"e16sex\"), weights = efc$weights)\n  expect_equal(out[[1]]$N, out1$N, ignore_attr = TRUE)\n  # mismatch of lengths\n  w <- c(efc$weights, 1)\n  expect_error(\n    data_tabulate(efc$e42dep, weights = w),\n    regex = \"Length of `weights`\"\n  )\n  # correct table footer\n  expect_snapshot(print(data_tabulate(efc$e42dep, weights = efc$weights)))\n  expect_snapshot(print_md(data_tabulate(efc$e42dep, weights = efc$weights)))\n  expect_snapshot(display(data_tabulate(efc$e42dep, weights = efc$weights)))\n  # correct table caption\n  expect_snapshot(print(data_tabulate(\n    efc,\n    c(\"e42dep\", \"e16sex\"),\n    collapse = TRUE,\n    weights = efc$weights\n  )))\n  expect_snapshot(print_md(data_tabulate(\n    efc,\n    c(\"e42dep\", \"e16sex\"),\n    weights = efc$weights\n  )))\n  expect_snapshot(display(data_tabulate(\n    efc,\n    c(\"e42dep\", \"e16sex\"),\n    weights = efc$weights\n  )))\n})\n\n\ntest_that(\"data_tabulate data.frame\", {\n  data(efc, package = \"datawizard\")\n  x <- data_tabulate(efc, c(\"e16sex\", \"c172code\"))\n  expect_s3_class(x, \"list\")\n  expect_length(x, 2L)\n  expect_identical(\n    attributes(x[[1]]),\n    list(\n      names = c(\n        \"Variable\",\n        \"Value\",\n        \"N\",\n        \"Raw %\",\n        \"Valid %\",\n        \"Cumulative %\"\n      ),\n      class = c(\"datawizard_table\", \"data.frame\"),\n      row.names = 1:3,\n      type = \"numeric\",\n      varname = \"e16sex\",\n      label = \"elder's gender\",\n      object = \"e16sex\",\n      duplicate_varnames = c(FALSE, TRUE, TRUE),\n      total_n = 100L,\n      valid_n = 100L\n    )\n  )\n  expect_identical(\n    attributes(x[[2]]),\n    list(\n      names = c(\n        \"Variable\",\n        \"Value\",\n        \"N\",\n        \"Raw %\",\n        \"Valid %\",\n        \"Cumulative %\"\n      ),\n      class = c(\"datawizard_table\", \"data.frame\"),\n      row.names = 1:4,\n      type = \"numeric\",\n      varname = \"c172code\",\n      label = \"carer's level of education\",\n      object = \"c172code\",\n      duplicate_varnames = c(FALSE, TRUE, TRUE, TRUE),\n      total_n = 100L,\n      valid_n = 90L\n    )\n  )\n  table1 <- x[[1]]\n  expect_identical(\n    as.vector(table1$Value),\n    as.character(c(\n      sort(\n        unique(efc$e16sex)\n      ),\n      NA\n    ))\n  )\n  expect_identical(table1$N, as.vector(table(addNA(efc$e16sex))))\n  expect_identical(\n    table1$`Valid %`,\n    as.vector(c(\n      100 * table(efc$e16sex) / sum(!is.na(efc$e16sex)),\n      NA\n    )),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_tabulate unsupported class\", {\n  data(mtcars)\n  expect_warning(\n    data_tabulate(lm(mpg ~ hp, data = mtcars)),\n    regex = \"Can't compute frequency tables\"\n  )\n})\n\n\ntest_that(\"data_tabulate print\", {\n  set.seed(123)\n  x <- sample.int(3, 1e6, TRUE)\n  out <- data_tabulate(x, name = \"Large Number\")\n  expect_identical(\n    attributes(out),\n    list(\n      names = c(\"Variable\", \"Value\", \"N\", \"Raw %\", \"Valid %\", \"Cumulative %\"),\n      class = c(\"datawizard_table\", \"data.frame\"),\n      row.names = 1:4,\n      type = \"integer\",\n      varname = \"Large Number\",\n      object = \"x\",\n      duplicate_varnames = c(FALSE, TRUE, TRUE, TRUE),\n      total_n = 1000000L,\n      valid_n = 1000000L\n    )\n  )\n})\n\n\ntest_that(\"data_tabulate print\", {\n  data(efc, package = \"datawizard\")\n  expect_snapshot(data_tabulate(efc$e42dep))\n})\n\n\ntest_that(\"data_tabulate print multiple\", {\n  data(efc, package = \"datawizard\")\n  expect_snapshot(data_tabulate(efc, c(\"c172code\", \"e16sex\")))\n})\n\n\ntest_that(\"data_tabulate big numbers\", {\n  set.seed(123)\n  x <- sample.int(5, size = 1e7, TRUE)\n  expect_snapshot(data_tabulate(x))\n  expect_snapshot(print(data_tabulate(x), big_mark = \"-\"))\n  expect_snapshot(print(data_tabulate(x), big_mark = \"\"))\n})\n\n\ntest_that(\"data_tabulate print multiple, collapse\", {\n  data(efc, package = \"datawizard\")\n  expect_snapshot(data_tabulate(efc, c(\"c172code\", \"e16sex\"), collapse = TRUE))\n})\n\n\ntest_that(\"data_tabulate grouped data.frame\", {\n  skip_if_not_installed(\"poorman\")\n  data(efc, package = \"datawizard\")\n  x <- data_tabulate(poorman::group_by(efc, e16sex), \"c172code\")\n  expect_s3_class(x, \"list\")\n  expect_length(x, 2L)\n  expect_identical(\n    attributes(x[[1]]),\n    list(\n      names = c(\n        \"Variable\",\n        \"Group\",\n        \"Value\",\n        \"N\",\n        \"Raw %\",\n        \"Valid %\",\n        \"Cumulative %\"\n      ),\n      class = c(\"datawizard_table\", \"data.frame\"),\n      row.names = 1:4,\n      type = \"numeric\",\n      varname = \"c172code\",\n      label = \"carer's level of education\",\n      object = \"c172code\",\n      group_variable = structure(\n        list(e16sex = 1),\n        .drop = TRUE,\n        row.names = 1L,\n        class = \"data.frame\"\n      ),\n      duplicate_varnames = c(FALSE, TRUE, TRUE, TRUE),\n      total_n = 46L,\n      valid_n = 41L\n    )\n  )\n  table1 <- x[[1]]\n  expect_identical(\n    as.vector(table1$Value),\n    as.character(c(\n      sort(\n        unique(efc$c172code)\n      ),\n      NA\n    ))\n  )\n  expect_identical(\n    table1$N,\n    as.vector(table(addNA(efc$c172code[efc$e16sex == 1])))\n  )\n  expect_identical(\n    table1$`Valid %`,\n    as.vector(c(\n      100 *\n        table(efc$c172code[efc$e16sex == 1]) /\n        sum(!is.na(efc$c172code[efc$e16sex == 1])),\n      NA\n    )),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_tabulate print grouped data\", {\n  skip_if_not_installed(\"poorman\")\n  data(efc, package = \"datawizard\")\n  expect_snapshot(data_tabulate(poorman::group_by(efc, e16sex), \"c172code\"))\n})\n\ntest_that(\"data_tabulate print, collapse groups\", {\n  skip_if_not_installed(\"poorman\")\n  data(efc, package = \"datawizard\")\n  expect_snapshot(\n    data_tabulate(poorman::group_by(efc, e16sex), \"c172code\", collapse = TRUE)\n  )\n})\n\ntest_that(\"data_tabulate print, collapse groups, drop levels\", {\n  skip_if_not_installed(\"poorman\")\n  data(efc, package = \"datawizard\")\n  expect_snapshot(\n    data_tabulate(\n      poorman::group_by(efc, e16sex),\n      \"e42dep\",\n      collapse = TRUE,\n      drop_levels = TRUE\n    )\n  )\n})\n\ntest_that(\"data_tabulate drop levels\", {\n  x <- factor(rep(letters[1:3], 3), levels = letters[1:5])\n  out1 <- data_tabulate(x, drop_levels = FALSE)\n  out2 <- data_tabulate(x, drop_levels = TRUE)\n  expect_identical(out1$N, c(3L, 3L, 3L, 0L, 0L, 0L))\n  expect_identical(as.character(out1$Value), c(\"a\", \"b\", \"c\", \"d\", \"e\", NA))\n  expect_identical(out2$N, c(3L, 3L, 3L, 0L))\n  expect_identical(as.character(out2$Value), c(\"a\", \"b\", \"c\", NA))\n})\n\n\n# select helpers ------------------------------\n\ntest_that(\"data_tabulate regex\", {\n  data(mtcars)\n  expect_identical(\n    data_tabulate(mtcars, select = \"arb\", regex = TRUE),\n    data_tabulate(mtcars, select = \"carb\")\n  )\n})\n\n\n# missing values ------------------------------\n\ntest_that(\"data_tabulate exclude/include missing values\", {\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n  out <- data_tabulate(efc$c172code)\n  expect_identical(out$N, c(8L, 66L, 16L, 10L))\n  out <- data_tabulate(efc$c172code, remove_na = TRUE)\n  expect_identical(out$N, c(8L, 66L, 16L))\n  out <- data_tabulate(efc$c172code, weights = efc$weights)\n  expect_identical(out$N, c(10, 67, 15, 13))\n  out <- data_tabulate(efc$c172code, remove_na = TRUE, weights = efc$weights)\n  expect_identical(out$N, c(10, 67, 15))\n})\n\n\n# cross tables ------------------------------\n\ntest_that(\"data_tabulate, cross tables\", {\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n\n  expect_snapshot(print(data_tabulate(\n    efc$c172code,\n    by = efc$e16sex,\n    proportions = \"full\"\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc$c172code,\n    by = efc$e16sex,\n    proportions = \"full\",\n    remove_na = TRUE\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc$c172code,\n    by = efc$e16sex,\n    proportions = \"full\",\n    weights = efc$weights\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc$c172code,\n    by = efc$e16sex,\n    proportions = \"full\",\n    remove_na = TRUE,\n    weights = efc$weights\n  ))) # nolint\n  expect_snapshot(print(data_tabulate(\n    efc,\n    \"c172code\",\n    by = efc$e16sex,\n    proportions = \"row\"\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc,\n    \"c172code\",\n    by = efc$e16sex,\n    proportions = \"row\",\n    remove_na = TRUE\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc,\n    \"c172code\",\n    by = efc$e16sex,\n    proportions = \"row\",\n    weights = efc$weights\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc,\n    \"c172code\",\n    by = efc$e16sex,\n    proportions = \"row\",\n    remove_na = TRUE,\n    weights = efc$weights\n  ))) # nolint\n  expect_snapshot(print(data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"column\"\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"column\",\n    remove_na = TRUE\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"column\",\n    weights = \"weights\"\n  )))\n  expect_snapshot(print(data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"column\",\n    remove_na = TRUE,\n    weights = \"weights\"\n  ))) # nolint\n  expect_snapshot(print(data_tabulate(\n    efc,\n    c(\"c172code\", \"e42dep\"),\n    by = \"e16sex\",\n    proportions = \"row\"\n  ))) # nolint\n})\n\ntest_that(\"data_tabulate, cross tables, HTML\", {\n  skip_if_not_installed(\"gt\")\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n\n  expect_s3_class(\n    print_html(data_tabulate(\n      efc$c172code,\n      by = efc$e16sex,\n      proportions = \"full\"\n    )),\n    \"gt_tbl\"\n  )\n  expect_s3_class(\n    print_html(data_tabulate(\n      efc$c172code,\n      by = efc$e16sex,\n      proportions = \"full\",\n      remove_na = TRUE\n    )),\n    \"gt_tbl\"\n  ) # nolint\n  expect_s3_class(\n    print_html(data_tabulate(\n      efc$c172code,\n      by = efc$e16sex,\n      proportions = \"full\",\n      weights = efc$weights\n    )),\n    \"gt_tbl\"\n  ) # nolint\n  expect_s3_class(\n    print_html(data_tabulate(\n      efc$c172code,\n      by = efc$e16sex,\n      proportions = \"full\",\n      remove_na = TRUE,\n      weights = efc$weights\n    )),\n    \"gt_tbl\"\n  ) # nolint\n  expect_s3_class(\n    print_html(data_tabulate(\n      efc,\n      \"c172code\",\n      by = efc$e16sex,\n      proportions = \"row\"\n    )),\n    \"gt_tbl\"\n  )\n  expect_s3_class(\n    print_html(data_tabulate(\n      efc,\n      \"c172code\",\n      by = efc$e16sex,\n      proportions = \"row\",\n      remove_na = TRUE,\n      weights = efc$weights\n    )),\n    \"gt_tbl\"\n  ) # nolint\n  expect_s3_class(\n    display(\n      data_tabulate(\n        efc,\n        \"c172code\",\n        by = efc$e16sex,\n        proportions = \"row\",\n        remove_na = TRUE,\n        weights = efc$weights\n      ),\n      format = \"html\"\n    ),\n    \"gt_tbl\"\n  ) # nolint\n})\n\ntest_that(\"data_tabulate, cross tables, tinytable\", {\n  skip_if_not_installed(\"tinytable\")\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n\n  expect_snapshot(display(\n    data_tabulate(efc$c172code, by = efc$e16sex, proportions = \"full\"),\n    format = \"tt\"\n  ))\n  expect_snapshot(display(\n    data_tabulate(\n      efc$c172code,\n      by = efc$e16sex,\n      proportions = \"full\",\n      remove_na = TRUE\n    ),\n    format = \"tt\"\n  ))\n  expect_snapshot(display(\n    data_tabulate(\n      efc$c172code,\n      by = efc$e16sex,\n      proportions = \"full\",\n      weights = efc$weights\n    ),\n    format = \"tt\"\n  ))\n  expect_snapshot(display(\n    data_tabulate(\n      efc$c172code,\n      by = efc$e16sex,\n      proportions = \"full\",\n      remove_na = TRUE,\n      weights = efc$weights\n    ),\n    format = \"tt\"\n  ))\n  expect_snapshot(display(\n    data_tabulate(efc, \"c172code\", by = efc$e16sex, proportions = \"row\"),\n    format = \"tt\"\n  ))\n  expect_snapshot(display(\n    data_tabulate(\n      efc,\n      \"c172code\",\n      by = efc$e16sex,\n      proportions = \"row\",\n      remove_na = TRUE,\n      weights = efc$weights\n    ),\n    format = \"tt\"\n  ))\n})\n\ntest_that(\"data_tabulate, cross tables, grouped df\", {\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n  grp <- data_group(efc, \"e42dep\")\n  expect_snapshot(print(data_tabulate(\n    grp,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"row\"\n  )))\n  skip_if_not_installed(\"gt\")\n  expect_s3_class(\n    print_html(data_tabulate(\n      grp,\n      \"c172code\",\n      by = \"e16sex\",\n      proportions = \"row\"\n    )),\n    \"gt_tbl\"\n  ) # nolint\n  expect_s3_class(\n    print_html(data_tabulate(\n      efc,\n      c(\"e16sex\", \"e42dep\"),\n      by = \"c172code\",\n      proportions = \"row\"\n    )),\n    \"gt_tbl\"\n  ) # nolint\n})\n\ntest_that(\"data_tabulate, cross tables, print/format works\", {\n  data(mtcars)\n  x <- data_tabulate(mtcars, c(\"cyl\", \"am\"), by = \"gear\")\n  expect_snapshot(print(x))\n})\n\ntest_that(\"data_tabulate, cross tables, errors by\", {\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n  expect_error(\n    data_tabulate(efc$c172code, by = \"e16sex\"),\n    regex = \"If `by` is a string\"\n  )\n  expect_error(\n    data_tabulate(efc$c172code, by = efc$e16sex[-1]),\n    regex = \"Length of `by`\"\n  )\n  expect_error(\n    data_tabulate(efc, \"c172code\", by = efc$e16sex[-1]),\n    regex = \"Length of `by`\"\n  )\n  expect_error(\n    data_tabulate(efc, \"c172code\", by = \"c16sex\"),\n    regex = \"not found\"\n  )\n  expect_error(\n    data_tabulate(efc, \"c172code\", by = c(\"e16sex\", \"e42dep\")),\n    regex = \"You may use\"\n  )\n})\n\ntest_that(\"data_tabulate, cross tables, errors weights\", {\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n  expect_error(\n    data_tabulate(efc$c172code, weights = \"weights\"),\n    regex = \"If `weights`\"\n  )\n  expect_error(\n    data_tabulate(efc$c172code, weights = efc$weights[-1]),\n    regex = \"Length of `weights`\"\n  )\n  expect_error(\n    data_tabulate(efc, \"c172code\", weights = efc$weights[-1]),\n    regex = \"Length of `weights`\"\n  )\n  expect_error(\n    data_tabulate(efc, \"c172code\", weights = \"weigths\"),\n    regex = \"not found\"\n  )\n  expect_error(\n    data_tabulate(efc, \"c172code\", weights = c(\"e16sex\", \"e42dep\")),\n    regex = \"length 1\"\n  )\n  expect_error(\n    data_tabulate(efc$c172code, weights = efc$wweight),\n    regex = \"not found\"\n  )\n})\n\ntest_that(\"data_tabulate, cross tables, modify structure\", {\n  skip_if_not_installed(\"knitr\")\n  data(efc, package = \"datawizard\")\n  x <- data_group(efc, c(\"c172code\", \"e16sex\"))\n  out <- data_tabulate(x, \"c172code\")\n  out[] <- lapply(\n    out,\n    data_select,\n    exclude = c(\"Variable\", \"Raw %\", \"Cumulative %\")\n  )\n  junk <- capture.output(print_md(out))\n  expect_false(grepl(\"Variable\", junk[3], fixed = TRUE))\n  expect_false(grepl(\"Raw %\", junk[3], fixed = TRUE))\n  # display() default to markdown\n  junk <- capture.output(display(out))\n  expect_false(grepl(\"Variable\", junk[3], fixed = TRUE))\n  expect_false(grepl(\"Raw %\", junk[3], fixed = TRUE))\n})\n\n\n# markdown -------------------------\n\ntest_that(\"data_tabulate, cross tables, markdown\", {\n  skip_if_not_installed(\"knitr\")\n  data(efc, package = \"datawizard\")\n  set.seed(123)\n  efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  efc$e16sex[sample.int(nrow(efc), 5)] <- NA\n\n  expect_snapshot(print_md(data_tabulate(\n    efc$c172code,\n    by = efc$e16sex,\n    proportions = \"full\"\n  )))\n  expect_snapshot(print_md(data_tabulate(\n    efc$c172code,\n    by = efc$e16sex,\n    proportions = \"full\",\n    remove_na = TRUE\n  )))\n  expect_snapshot(print_md(data_tabulate(\n    efc$c172code,\n    by = efc$e16sex,\n    proportions = \"full\",\n    weights = efc$weights\n  )))\n  expect_snapshot(print_md(data_tabulate(\n    efc$c172code,\n    by = efc$e16sex,\n    proportions = \"full\",\n    remove_na = TRUE,\n    weights = efc$weights\n  ))) # nolint\n  expect_snapshot(print_md(data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"column\",\n    remove_na = TRUE,\n    weights = \"weights\"\n  ))) # nolint\n  expect_snapshot(print_md(data_tabulate(\n    efc,\n    c(\"c172code\", \"e42dep\"),\n    by = \"e16sex\",\n    proportions = \"row\"\n  ))) # nolint\n  expect_snapshot(display(data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"column\",\n    remove_na = TRUE,\n    weights = \"weights\"\n  ))) # nolint\n  expect_snapshot(display(data_tabulate(\n    efc,\n    c(\"c172code\", \"e42dep\"),\n    by = \"e16sex\",\n    proportions = \"row\"\n  ))) # nolint\n})\n\n\n# validate against table -------------------------\n\ntest_that(\"data_tabulate, validate against table\", {\n  data(mtcars)\n  # frequency table\n  out1 <- as.data.frame(table(mtcars$cyl))\n  out2 <- data_tabulate(mtcars$cyl, remove_na = TRUE)\n  expect_identical(out1$Freq, out2$N)\n  # crosstable\n  out1 <- data_arrange(\n    as.data.frame(table(mtcars$cyl, mtcars$gear)),\n    c(\"Var1\", \"Var2\")\n  )\n  out2 <- data_rename(\n    data_to_long(\n      as.data.frame(data_tabulate(\n        mtcars$cyl,\n        by = mtcars$gear,\n        remove_na = TRUE\n      )),\n      2:4,\n      names_to = \"Var2\",\n      values_to = \"Freq\"\n    ),\n    \"mtcars$cyl\",\n    \"Var1\"\n  )\n  out1[[2]] <- as.character(out1[[2]])\n  expect_equal(out1, out2, ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_tabulate, correct 0% for proportions\", {\n  data(efc, package = \"datawizard\")\n  out <- data_tabulate(efc, \"c172code\", by = \"e16sex\", proportions = \"column\")\n  expect_identical(\n    format(out[[1]])[[4]],\n    c(\"0 (0.0%)\", \"0 (0.0%)\", \"0 (0.0%)\", \"0 (0.0%)\", \"\", \"0\")\n  )\n  expect_snapshot(print(out[[1]]))\n})\n\n\n# coercing to data frame -------------------------\n\ntest_that(\"data_tabulate, as.data.frame, frequency tables\", {\n  data(mtcars)\n  # frequency table\n  x <- data_tabulate(mtcars$cyl)\n  out <- as.data.frame(x)\n  expect_named(\n    out,\n    c(\"Variable\", \"Value\", \"N\", \"Raw %\", \"Valid %\", \"Cumulative %\")\n  )\n  expect_identical(\n    out$Variable,\n    c(\"mtcars$cyl\", \"mtcars$cyl\", \"mtcars$cyl\", \"mtcars$cyl\")\n  )\n  expect_false(any(vapply(out[2:ncol(out)], is.character, logical(1))))\n  # frequency tables\n  x <- data_tabulate(mtcars, select = c(\"cyl\", \"am\"))\n  out <- as.data.frame(x)\n  expect_named(out, c(\"var\", \"table\"))\n  expect_equal(\n    vapply(out, class, character(1)),\n    c(\"character\", \"AsIs\"),\n    ignore_attr = TRUE\n  )\n  expect_length(out$table, 2L)\n  expect_named(\n    out$table[[1]],\n    c(\"Variable\", \"Value\", \"N\", \"Raw %\", \"Valid %\", \"Cumulative %\")\n  )\n  expect_identical(out$table[[1]]$Variable, c(\"cyl\", \"cyl\", \"cyl\", \"cyl\"))\n  expect_false(any(vapply(\n    out$table[[1]][2:ncol(out$table[[1]])],\n    is.character,\n    logical(1)\n  )))\n})\n\n\ntest_that(\"data_tabulate, as.data.frame, cross tables\", {\n  data(mtcars)\n  # cross table\n  x <- data_tabulate(mtcars, \"cyl\", by = \"am\")\n  out <- as.data.frame(x)\n  expect_named(out, c(\"var\", \"table\"))\n  expect_equal(\n    vapply(out, class, character(1)),\n    c(\"character\", \"AsIs\"),\n    ignore_attr = TRUE\n  )\n  expect_length(out$table, 1L)\n  expect_named(out$table[[1]], c(\"cyl\", \"0\", \"1\", \"NA\"))\n  expect_identical(nrow(out$table[[1]]), 4L)\n  # cross tables\n  x <- data_tabulate(mtcars, c(\"cyl\", \"vs\"), by = \"am\")\n  out <- as.data.frame(x)\n  expect_named(out, c(\"var\", \"table\"))\n  expect_equal(\n    vapply(out, class, character(1)),\n    c(\"character\", \"AsIs\"),\n    ignore_attr = TRUE\n  )\n  expect_length(out$table, 2L)\n  expect_named(out$table[[1]], c(\"cyl\", \"0\", \"1\", \"NA\"))\n  expect_identical(nrow(out$table[[1]]), 4L)\n})\n\n\ntest_that(\"data_tabulate, as.data.frame, cross tables with total N\", {\n  # cross table, with total\n  x <- data_tabulate(mtcars, \"cyl\", by = \"am\")\n  out <- as.data.frame(x, add_total = TRUE)\n  expect_named(out, c(\"var\", \"table\"))\n  expect_equal(\n    vapply(out, class, character(1)),\n    c(\"character\", \"AsIs\"),\n    ignore_attr = TRUE\n  )\n  expect_length(out$table, 1L)\n  expect_named(out$table[[1]], c(\"cyl\", \"0\", \"1\", \"<NA>\", \"Total\"))\n  expect_identical(nrow(out$table[[1]]), 5L)\n  expect_identical(out$table[[1]]$cyl, c(\"4\", \"6\", \"8\", NA, \"Total\"))\n  # cross tables, with total\n  x <- data_tabulate(mtcars, c(\"cyl\", \"vs\"), by = \"am\")\n  out <- as.data.frame(x, add_total = TRUE)\n  expect_named(out, c(\"var\", \"table\"))\n  expect_equal(\n    vapply(out, class, character(1)),\n    c(\"character\", \"AsIs\"),\n    ignore_attr = TRUE\n  )\n  expect_length(out$table, 2L)\n  expect_named(out$table[[1]], c(\"cyl\", \"0\", \"1\", \"<NA>\", \"Total\"))\n  expect_identical(nrow(out$table[[1]]), 5L)\n  expect_identical(out$table[[1]]$cyl, c(\"4\", \"6\", \"8\", NA, \"Total\"))\n})\n\n\n# table methods -----------------------------\n\ntest_that(\"data_tabulate, table methods\", {\n  data(mtcars)\n\n  # datawizard_table\n  x <- data_tabulate(mtcars$cyl)\n  expect_type(as.table(x), \"list\")\n  expect_s3_class(as.table(x, simplify = TRUE), \"table\")\n  expect_snapshot(as.table(x))\n\n  # datawizard_tables\n  x <- data_tabulate(mtcars, \"cyl\")\n  expect_type(as.table(x), \"list\")\n  expect_s3_class(as.table(x, simplify = TRUE), \"table\")\n  expect_snapshot(as.table(x))\n\n  # test remove_na\n  x <- data_tabulate(mtcars, \"cyl\", remove_na = TRUE)\n  expect_identical(x[[1]]$N, as.vector(as.table(x, simplify = TRUE)))\n  x <- data_tabulate(mtcars, \"cyl\")\n  expect_identical(\n    x[[1]]$N,\n    as.vector(as.table(x, simplify = TRUE, remove_na = FALSE))\n  )\n  expect_snapshot(as.table(x, remove_na = FALSE))\n\n  # datawizard_tables, multiple\n  x <- data_tabulate(mtcars, c(\"cyl\", \"gear\"))\n  expect_identical(unlist(lapply(as.table(x), class)), rep(\"table\", 2L))\n  expect_type(as.table(x, simplify = TRUE), \"list\") # no simplification\n  expect_type(as.table(x, simplify = FALSE), \"list\")\n  expect_snapshot(as.table(x))\n\n  # datawizard_crosstab\n  x <- data_tabulate(mtcars$cyl, mtcars$gear)\n  expect_type(as.table(x), \"list\")\n  expect_s3_class(as.table(x, simplify = TRUE), \"table\")\n  expect_snapshot(as.table(x))\n  expect_snapshot(as.table(x, simplify = TRUE))\n\n  # datawizard_crosstabs\n  x <- data_tabulate(mtcars, \"cyl\", by = \"gear\")\n  expect_type(as.table(x), \"list\")\n  expect_s3_class(as.table(x, simplify = TRUE), \"table\")\n  expect_snapshot(as.table(x))\n  expect_snapshot(as.table(x, simplify = TRUE))\n\n  # datawizard_crosstabs, multiple\n  x <- data_tabulate(mtcars, c(\"am\", \"cyl\"), by = \"gear\")\n  expect_identical(unlist(lapply(as.table(x), class)), rep(\"table\", 2L))\n  expect_identical(\n    x[[1]]$`3`[1:2],\n    as.vector(as.table(x)[[1]][, 1, drop = TRUE])\n  )\n  expect_identical(\n    x[[2]]$`4`[1:3],\n    as.vector(as.table(x)[[2]][, 2, drop = TRUE])\n  )\n  expect_type(as.table(x), \"list\")\n  expect_type(as.table(x, simplify = TRUE), \"list\") # no simplification\n  expect_snapshot(as.table(x))\n\n  # grouped data frames\n  d <- data_group(mtcars, \"am\")\n  x <- data_tabulate(d, \"cyl\", by = \"gear\")\n  expect_named(as.table(x), c(\"am (0)\", \"am (1)\"))\n  expect_snapshot(as.table(x))\n\n  # messages - no missings to remove\n  expect_silent(as.table(data_tabulate(mtcars, \"cyl\")))\n  expect_silent(as.table(data_tabulate(mtcars, \"cyl\"), verbose = FALSE))\n})\n\n\ntest_that(\"data_tabulate, table methods, only warn if necessary\", {\n  # missings\n  data(efc)\n\n  # single variable\n  expect_message(as.table(data_tabulate(efc$c172code)))\n  expect_silent(as.table(data_tabulate(efc$c172code, remove_na = TRUE)))\n  expect_silent(as.table(data_tabulate(efc$c172code), remove_na = FALSE))\n  expect_silent(as.table(data_tabulate(efc$c172code), verbose = FALSE))\n\n  # cross table\n  expect_message(\n    as.table(data_tabulate(efc, \"c172code\", by = \"e42dep\")),\n    regex = \"Removing NA values\"\n  )\n  expect_silent(as.table(data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e42dep\",\n    remove_na = TRUE\n  )))\n  expect_silent(as.table(\n    data_tabulate(efc, \"c172code\", by = \"e42dep\"),\n    remove_na = FALSE\n  ))\n  expect_silent(as.table(\n    data_tabulate(efc, \"c172code\", by = \"e42dep\"),\n    verbose = FALSE\n  ))\n\n  # no missings\n  data(mtcars)\n\n  # single variable\n  expect_silent(as.table(data_tabulate(mtcars$gear)))\n  expect_silent(as.table(data_tabulate(mtcars$gear, remove_na = TRUE)))\n  expect_silent(as.table(data_tabulate(mtcars$gear), verbose = FALSE))\n\n  # cross table\n  expect_silent(as.table(data_tabulate(mtcars, \"gear\", by = \"cyl\")))\n  expect_silent(as.table(data_tabulate(\n    mtcars,\n    \"gear\",\n    by = \"cyl\",\n    remove_na = TRUE\n  )))\n  expect_silent(as.table(\n    data_tabulate(mtcars, \"gear\", by = \"cyl\"),\n    verbose = FALSE\n  ))\n\n  # group DF throws no warning\n  d <- data_group(mtcars, \"am\")\n  expect_silent(as.table(data_tabulate(d, \"cyl\", by = \"gear\")))\n})\n\n\ntest_that(\"data_tabulate, cross tables, extract proportions\", {\n  data(efc, package = \"datawizard\")\n  out <- data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"row\",\n    remove_na = TRUE\n  )\n  tab <- table(efc$c172code, efc$e16sex) /\n    rowSums(table(efc$c172code, efc$e16sex))\n  dimnames(tab) <- list(c(\"1\", \"2\", \"3\"), c(\"male\", \"female\"))\n  expect_equal(\n    as.prop.table(out, verbose = FALSE),\n    list(tab),\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n  expect_equal(\n    as.prop.table(out, verbose = FALSE, simplify = TRUE),\n    tab,\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n  out <- data_tabulate(\n    efc,\n    \"c172code\",\n    by = \"e16sex\",\n    proportions = \"col\",\n    remove_na = TRUE\n  )\n  tab <- as.table(t(\n    t(table(efc$c172code, efc$e16sex)) /\n      colSums(table(efc$c172code, efc$e16sex))\n  ))\n  dimnames(tab) <- list(c(\"1\", \"2\", \"3\"), c(\"male\", \"female\"))\n  expect_equal(\n    as.prop.table(out, verbose = FALSE),\n    list(tab),\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n  expect_equal(\n    as.prop.table(out, verbose = FALSE, simplify = TRUE),\n    tab,\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_to_factor.R",
    "content": "# numeric\ntest_that(\"to_factor\", {\n  x <- c(10, 11, 12)\n  expect_identical(\n    to_factor(x),\n    structure(1:3, .Label = c(\"10\", \"11\", \"12\"), class = \"factor\")\n  )\n\n  data(efc)\n  x <- to_factor(efc$c172code)\n  expect_identical(\n    levels(x),\n    c(\n      \"low level of education\",\n      \"intermediate level of education\",\n      \"high level of education\"\n    )\n  )\n  x <- to_factor(efc$c172code, labels_to_levels = FALSE)\n  expect_identical(levels(x), c(\"1\", \"2\", \"3\"))\n})\n\n# numeric, partially labelled\ntest_that(\"to_factor\", {\n  x <- c(10, 11, 12)\n  attr(x, \"labels\") <- c(ten = 10, twelve = 12)\n  expect_message(\n    expect_identical(\n      to_factor(x),\n      structure(1:3, levels = c(\"ten\", \"11\", \"twelve\"), class = \"factor\")\n    ),\n    regexp = \"Not all factor levels\"\n  )\n})\n\n# factor\ntest_that(\"to_factor\", {\n  data(efc)\n  expect_identical(to_factor(efc$e42dep), efc$e42dep)\n})\n\n# data frame\ntest_that(\"to_factor\", {\n  data(iris)\n  out <- to_factor(iris)\n  expect_identical(out$Species, iris$Species)\n  expect_true(all(vapply(out, is.factor, TRUE)))\n  expect_identical(\n    levels(out$Sepal.Length),\n    c(\n      \"4.3\",\n      \"4.4\",\n      \"4.5\",\n      \"4.6\",\n      \"4.7\",\n      \"4.8\",\n      \"4.9\",\n      \"5\",\n      \"5.1\",\n      \"5.2\",\n      \"5.3\",\n      \"5.4\",\n      \"5.5\",\n      \"5.6\",\n      \"5.7\",\n      \"5.8\",\n      \"5.9\",\n      \"6\",\n      \"6.1\",\n      \"6.2\",\n      \"6.3\",\n      \"6.4\",\n      \"6.5\",\n      \"6.6\",\n      \"6.7\",\n      \"6.8\",\n      \"6.9\",\n      \"7\",\n      \"7.1\",\n      \"7.2\",\n      \"7.3\",\n      \"7.4\",\n      \"7.6\",\n      \"7.7\",\n      \"7.9\"\n    )\n  )\n\n  out <- to_factor(iris, select = starts_with(\"Sep\"), append = TRUE)\n  expect_identical(\n    colnames(out),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"Sepal.Length_f\",\n      \"Sepal.Width_f\"\n    )\n  )\n  expect_identical(sum(vapply(out, is.factor, TRUE)), 3L)\n})\n\n\n# select helpers ------------------------------\ntest_that(\"to_factor regex\", {\n  expect_identical(\n    to_factor(mtcars, select = \"yl\", regex = TRUE),\n    to_factor(mtcars, select = \"cyl\")\n  )\n  expect_identical(\n    to_factor(mtcars, select = \"yl$\", regex = TRUE),\n    to_factor(mtcars, select = \"cyl\")\n  )\n})\n\n\n# SPSS file, many value labels  -----------------------------------\n\nskip_if_not_installed(\"httr\")\nskip_if_not_installed(\"haven\")\n\nskip_on_cran()\n\nskip_if_not_installed(\"curl\")\nskip_if_offline()\n\n# Output validated against SPSS output from original dataset\n\ntest_that(\"data_read, convert many labels correctly\", {\n  temp_file <- tempfile(fileext = \".sav\")\n  request <- httr::GET(\n    \"https://raw.github.com/easystats/circus/master/data/spss_many_labels.sav\"\n  )\n  httr::stop_for_status(request)\n  writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n  d <- data_read(\n    temp_file,\n    convert_factors = FALSE,\n    verbose = FALSE\n  )\n  expect_identical(\n    levels(to_factor(d$selv1)),\n    c(\n      \"Vignette 1 weiblich (Gülsen E. Reinigungskraft B)\",\n      \"Vignette 2 weiblich (Gülsen E. Anwältin B)\",\n      \"Vignette 3 weiblich (Monika E. Reinigungskraft B)\",\n      \"Vignette 4 weiblich (Monika E. Anwältin B)\",\n      \"Vignette 5 männlich (Hasan E. Reinigungskraft B)\",\n      \"Vignette 6 männlich (Hasan E. Anwalt B)\",\n      \"Vignette 7 männlich (Martin E. Reinigungskraft B)\",\n      \"Vignette 8 männlich (Martin E. Anwalt B)\",\n      \"Vignette 9 weiblich (Gülsen E. Reinigungskraft E)\",\n      \"Vignette 10 weiblich (Gülsen E. Anwältin E)\",\n      \"Vignette 11 weiblich (Monika E. Reinigungskraft E)\",\n      \"Vignette 12 weiblich (Monika E. Anwältin E)\",\n      \"Vignette 13 männlich (Hasan E. Reinigungskraft E)\",\n      \"Vignette 14 männlich (Hasan E. Anwalt E)\",\n      \"Vignette 15 männlich (Martin E. Reinigungskraft E)\",\n      \"Vignette 16 männlich (Martin E. Anwalt E)\"\n    )\n  )\n  expect_snapshot(data_tabulate(to_factor(d$selv1)))\n\n  expect_identical(levels(to_factor(d$c12)), c(\"ja\", \"nein\", \"keine Angabe\"))\n  expect_snapshot(data_tabulate(to_factor(d$c12)))\n\n  expect_identical(\n    levels(to_factor(d$c12a)),\n    c(\"Filter\", \"ja\", \"nein\", \"keine Angabe\")\n  )\n  expect_snapshot(data_tabulate(to_factor(d$c12a)))\n  expect_identical(\n    levels(to_factor(d$c12c)),\n    c(\n      \"Filter\",\n      \"0 = keine\",\n      \"1\",\n      \"2\",\n      \"3\",\n      \"4\",\n      \"5\",\n      \"6\",\n      \"7\",\n      \"8\",\n      \"9\",\n      \"10 = sehr starke\",\n      \"weiß nicht / keine Angabe\"\n    )\n  )\n  expect_snapshot(data_tabulate(to_factor(d$c12c)))\n  unlink(temp_file)\n})\n\n\ntest_that(\"to_factor works with haven_labelled, convert many labels correctly\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"temp_file\", fileext = \".sav\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/EFC.sav\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- haven::read_spss(temp_file)\n    x <- to_factor(d$c172code)\n    expect_identical(\n      levels(x),\n      c(\n        \"low level of education\",\n        \"intermediate level of education\",\n        \"high level of education\"\n      )\n    )\n  })\n})\n"
  },
  {
    "path": "tests/testthat/test-data_to_long.R",
    "content": "set.seed(123)\nwide_data <- data.frame(replicate(3, sample.int(5)))\n\ntest_that(\"data_to_long works\", {\n  expect_equal(\n    head(data_to_long(wide_data)),\n    data.frame(\n      Name = c(\"X1\", \"X2\", \"X3\", \"X1\", \"X2\", \"X3\"),\n      Value = c(3L, 3L, 2L, 2L, 1L, 3L),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n\n  expect_equal(\n    head(data_to_long(\n      wide_data,\n      select = c(1, 2),\n      names_to = \"Column\",\n      values_to = \"Numbers\",\n      rows_to = \"Row\"\n    )),\n    data.frame(\n      X3 = c(2L, 2L, 3L, 3L, 1L, 1L),\n      Row = c(1, 1, 2, 2, 3, 3),\n      Column = c(\"X1\", \"X2\", \"X1\", \"X2\", \"X1\", \"X2\"),\n      Numbers = c(3L, 3L, 2L, 1L, 5L, 2L),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_to_long works - using row names as idvar\", {\n  data(mtcars)\n  out <- data_to_long(mtcars, select = 2:4)\n  expect_equal(\n    dim(out),\n    c(96, 10),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    colnames(out),\n    c(\"mpg\", \"drat\", \"wt\", \"qsec\", \"vs\", \"am\", \"gear\", \"carb\", \"name\", \"value\"),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    head(out$value),\n    c(8, 304, 150, 8, 472, 205),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n})\n\n\ntest_that(\"data_to_long works - complex dataset\", {\n  skip_if_not_installed(\"psych\")\n  data <- psych::bfi\n\n  long <- data_to_long(\n    data,\n    select = regex(\"\\\\d\"),\n    names_to = \"Item\",\n    values_to = \"Score\",\n    rows_to = \"Participant\"\n  )\n\n  expect_snapshot(str(long))\n\n  long$Facet <- gsub(\"\\\\d\", \"\", long$Item)\n  long$Item <- gsub(\"[A-Z]\", \"\", long$Item)\n  long$Item <- paste0(\"I\", long$Item)\n\n  long1 <- data_to_long(\n    data,\n    select = starts_with(\"A\"),\n    names_to = \"Item\",\n    values_to = \"Score\",\n    rows_to = \"Participant\"\n  )\n\n  expect_identical(unique(long1$Item), c(\"A1\", \"A2\", \"A3\", \"A4\", \"A5\"))\n  expect_identical(unique(long1$Score), c(2L, 4L, 3L, 5L, 6L, 1L, NA))\n  expect_identical(ncol(long1), 26L)\n  expect_identical(nrow(long1), 14000L)\n\n  long1 <- data_to_long(\n    data,\n    select = starts_with(\"a\"),\n    names_to = \"Item\",\n    values_to = \"Score\",\n    rows_to = \"Participant\"\n  )\n\n  expect_identical(ncol(long1), 30L)\n  expect_identical(nrow(long1), nrow(data))\n\n  long1 <- data_to_long(\n    data,\n    select = starts_with(\"a\"),\n    names_to = \"Item\",\n    values_to = \"Score\",\n    rows_to = \"Participant\",\n    ignore_case = TRUE\n  )\n\n  expect_identical(unique(long1$Item), c(\"A1\", \"A2\", \"A3\", \"A4\", \"A5\", \"age\"))\n  expect_identical(ncol(long1), 25L)\n  expect_identical(nrow(long1), 16800L)\n\n  long1 <- data_to_long(\n    data,\n    select = c(1:5, 28),\n    names_to = \"Item\",\n    values_to = \"Score\",\n    rows_to = \"Participant\",\n    ignore_case = TRUE\n  )\n\n  expect_identical(unique(long1$Item), c(\"A1\", \"A2\", \"A3\", \"A4\", \"A5\", \"age\"))\n  expect_identical(ncol(long1), 25L)\n  expect_identical(nrow(long1), 16800L)\n})\n\n\ntest_that(\"data_to_long: arg 'cols' overrides 'select'\", {\n  skip_if_not_installed(\"psych\")\n  data <- psych::bfi\n\n  expect_identical(\n    data_to_long(\n      wide_data,\n      select = c(1, 2),\n      names_to = \"Column\",\n      values_to = \"Numbers\",\n      rows_to = \"Row\"\n    ),\n    data_to_long(\n      wide_data,\n      cols = c(1, 2),\n      names_to = \"Column\",\n      values_to = \"Numbers\",\n      rows_to = \"Row\"\n    )\n  )\n\n  expect_identical(\n    data_to_long(\n      data,\n      cols = regex(\"\\\\d\"),\n      names_to = \"Item\",\n      values_to = \"Score\",\n      rows_to = \"Participant\"\n    ),\n    data_to_long(\n      data,\n      select = regex(\"\\\\d\"),\n      names_to = \"Item\",\n      values_to = \"Score\",\n      rows_to = \"Participant\"\n    )\n  )\n\n  expect_identical(\n    data_to_long(\n      data,\n      cols = starts_with(\"A\"),\n      names_to = \"Item\",\n      values_to = \"Score\",\n      rows_to = \"Participant\"\n    ),\n    data_to_long(\n      data,\n      select = starts_with(\"A\"),\n      names_to = \"Item\",\n      values_to = \"Score\",\n      rows_to = \"Participant\"\n    )\n  )\n})\n\n\nd <- data.frame(\n  age = c(20, 30, 40),\n  sex = c(\"Female\", \"Male\", \"Male\"),\n  score_t1 = c(30, 35, 32),\n  score_t2 = c(33, 34, 37),\n  speed_t1 = c(2, 3, 1),\n  speed_t2 = c(3, 4, 5),\n  stringsAsFactors = FALSE\n)\n\ntest_that(\"data_to_long works as expected - simple dataset\", {\n  out <- data_to_long(d, starts_with(\"score\"))\n  expect_identical(\n    out$name,\n    c(\"score_t1\", \"score_t2\", \"score_t1\", \"score_t2\", \"score_t1\", \"score_t2\")\n  )\n  expect_identical(\n    out$value,\n    c(d$score_t1, d$score_t2)[c(1, 4, 2, 5, 3, 6)]\n  )\n\n  out <- data_to_long(\n    d,\n    contains(\"t2\"),\n    names_to = \"NewCol\",\n    values_to = \"Time\"\n  )\n  expect_identical(\n    out$NewCol,\n    c(\"score_t2\", \"speed_t2\", \"score_t2\", \"speed_t2\", \"score_t2\", \"speed_t2\")\n  )\n  expect_identical(out$Time, c(33, 3, 34, 4, 37, 5))\n})\n\n\ntest_that(\"data_to_long works as expected - select-helper inside functions, using regex\", {\n  test_fun <- function(data, i) {\n    data_to_long(data, select = i, regex = TRUE)\n  }\n  out <- test_fun(d, \"^score\")\n  expect_identical(\n    out$name,\n    c(\"score_t1\", \"score_t2\", \"score_t1\", \"score_t2\", \"score_t1\", \"score_t2\")\n  )\n  expect_identical(\n    out$value,\n    c(d$score_t1, d$score_t2)[c(1, 4, 2, 5, 3, 6)]\n  )\n})\n\n\ntest_that(\"data_to_long: need to provide sep or pattern if several names_to\", {\n  expect_error(\n    data_to_long(wide_data, names_to = c(\"foo\", \"foo2\")),\n    \"you supply multiple names\"\n  )\n})\n\ntest_that(\"data_to_long: can't use sep or pattern if only one names_to\", {\n  expect_error(\n    data_to_long(wide_data, names_to = \"foo\", names_sep = \"_\"),\n    \"can't use `names_sep`\"\n  )\n\n  expect_error(\n    data_to_long(wide_data, names_to = \"foo\", names_pattern = \"_\"),\n    \"can't use `names_pattern`\"\n  )\n})\n\ntest_that(\"data_to_long: error if no columns to reshape\", {\n  # since #602, we no longer have the case that .select_nse() returns no\n  # columns, because we error before when no column found, instead of returning\n  # NULL or a vector of lenght zero.\n  expect_error(\n    data_to_long(wide_data, cols = \"foo\"),\n    \"Possibly misspelled\"\n  )\n})\n\n\n# EQUIVALENCE WITH TIDYR - PIVOT_LONGER -------------------------------------------\n\n# Examples coming from: https://tidyr.tidyverse.org/articles/pivot.html#longer\n\ntest_that(\"data_to_long equivalent to pivot_longer: ex 1\", {\n  skip_if_not_installed(\"tidyr\")\n\n  x <- tidyr::relig_income %>% # nolint\n    tidyr::pivot_longer(!religion, names_to = \"income\", values_to = \"count\")\n\n  y <- tidyr::relig_income %>% # nolint\n    data_to_long(cols = -religion, names_to = \"income\", values_to = \"count\")\n\n  expect_equal(x, y, ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_to_long equivalent to pivot_longer: ex 2\", {\n  skip_if_not_installed(\"tidyr\")\n\n  x <- tidyr::billboard %>%\n    tidyr::pivot_longer(\n      cols = starts_with(\"wk\"),\n      names_to = \"week\",\n      values_to = \"rank\"\n    )\n\n  y <- tidyr::billboard %>%\n    data_to_long(\n      cols = starts_with(\"wk\"),\n      names_to = \"week\",\n      values_to = \"rank\"\n    )\n\n  expect_equal(x, y, ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_to_long equivalent to pivot_longer: ex 3\", {\n  skip_if_not_installed(\"tidyr\")\n\n  x <- tidyr::billboard %>%\n    tidyr::pivot_longer(\n      cols = starts_with(\"wk\"),\n      names_to = \"week\",\n      values_to = \"rank\",\n      values_drop_na = TRUE\n    )\n\n  y <- tidyr::billboard %>%\n    data_to_long(\n      cols = starts_with(\"wk\"),\n      names_to = \"week\",\n      values_to = \"rank\",\n      values_drop_na = TRUE\n    )\n\n  expect_equal(x, y, ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_to_long equivalent to pivot_longer: ex 4\", {\n  skip_if_not_installed(\"tidyr\")\n\n  x <- tidyr::billboard %>%\n    tidyr::pivot_longer(\n      cols = starts_with(\"wk\"),\n      names_to = \"week\",\n      names_prefix = \"wk\",\n      values_to = \"rank\",\n      values_drop_na = TRUE\n    )\n\n  y <- tidyr::billboard %>%\n    data_to_long(\n      select = starts_with(\"wk\"),\n      names_to = \"week\",\n      names_prefix = \"wk\",\n      values_to = \"rank\",\n      values_drop_na = TRUE\n    )\n\n  expect_equal(x, y, ignore_attr = TRUE)\n})\n\n\ntest_that(\"data_to_long equivalent to pivot_longer: ex 5\", {\n  skip_if_not_installed(\"tidyr\")\n\n  suppressWarnings({\n    x <- tidyr::who %>%\n      tidyr::pivot_longer(\n        cols = 5:60,\n        names_to = c(\"diagnosis\", \"gender\", \"age\"),\n        names_sep = \"_\",\n        values_to = \"count\"\n      )\n  })\n\n  y <- tidyr::who %>%\n    data_to_long(\n      cols = 5:60,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_sep = \"_\",\n      values_to = \"count\"\n    )\n\n  expect_equal(x, y, ignore_attr = TRUE)\n})\n\ntest_that(\"data_to_long equivalent to pivot_longer: ex 6\", {\n  skip_if_not_installed(\"tidyr\")\n\n  x <- tidyr::who %>%\n    tidyr::pivot_longer(\n      cols = new_sp_m014:newrel_f65,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_pattern = \"new_?(.*)_(.)(.*)\",\n      values_to = \"count\"\n    )\n\n  y <- tidyr::who %>%\n    data_to_long(\n      cols = 5:60,\n      names_to = c(\"diagnosis\", \"gender\", \"age\"),\n      names_pattern = \"new_?(.*)_(.)(.*)\",\n      values_to = \"count\"\n    )\n\n  expect_equal(x, y, ignore_attr = TRUE)\n})\n\n\n# tests coming from tidyr's repo\n# https://github.com/tidyverse/tidyr/blob/main/tests/testthat/test-pivot-long.R\n\ntest_that(\"can reshape all cols to long\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- tidyr::tibble(x = 1:2, y = 3:4)\n  pv <- data_to_long(df, x:y)\n\n  expect_named(pv, c(\"name\", \"value\"))\n  expect_identical(pv$name, rep(names(df), 2))\n  expect_identical(pv$value, c(1L, 3L, 2L, 4L))\n})\n\ntest_that(\"values interleaved correctly\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- tidyr::tibble(\n    x = c(1, 2),\n    y = c(10, 20),\n    z = c(100, 200)\n  )\n  pv <- data_to_long(df, 1:3)\n\n  expect_identical(pv$value, c(1, 10, 100, 2, 20, 200))\n})\n\ntest_that(\"preserves original keys\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- tidyr::tibble(x = 1:2, y = 2, z = 1:2)\n  pv <- data_to_long(df, y:z)\n\n  expect_named(pv, c(\"x\", \"name\", \"value\"))\n  expect_identical(pv$x, rep(df$x, each = 2))\n})\n\ntest_that(\"can drop missing values\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- data.frame(x = c(1, NA), y = c(NA, 2))\n  pv <- data_to_long(df, x:y, values_drop_na = TRUE)\n\n  expect_identical(pv$name, c(\"x\", \"y\"))\n  expect_identical(pv$value, c(1, 2))\n})\n\ntest_that(\"mixed columns are automatically coerced\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- data.frame(x = factor(\"a\"), y = factor(\"b\"))\n  pv <- data_to_long(df, x:y)\n\n  expect_identical(pv$value, factor(c(\"a\", \"b\")))\n})\n\ntest_that(\"error when overwriting existing column\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- tidyr::tibble(x = 1, y = 2)\n\n  expect_error(\n    data_to_long(df, y, names_to = \"x\"),\n    regexp = \"are already present\"\n  )\n})\n\ntest_that(\"preserve date format\", {\n  skip_if_not_installed(\"tidyr\")\n\n  family <- tidyr::tibble(\n    family = 1:3,\n    dob_child1 = as.Date(c(\"1998-11-26\", \"2004-10-10\", \"2000-12-05\")),\n    dob_child2 = as.Date(c(\"2000-01-29\", NA, \"2004-04-05\"))\n  )\n\n  tidyr <- tidyr::pivot_longer(family, !family, names_to = \"child\")\n  datawiz <- data_to_long(family, -family, names_to = \"child\")\n\n  expect_identical(tidyr, datawiz)\n})\n\n\ntest_that(\"works with labelled data\", {\n  data(efc, package = \"datawizard\")\n  out <- data_to_long(\n    efc,\n    select = c(\"e16sex\", \"c172code\"),\n    names_to = \"Dummy\",\n    values_to = \"Score\"\n  )\n  expect_identical(nrow(out), 200L)\n  expect_identical(attributes(out$e42dep)$label, \"elder's dependency\")\n})\n\n\ntest_that(\"don't convert factors to integer\", {\n  data(\"mtcars\")\n  mtcars <- mtcars[c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 18L, 29L, 31L), ]\n  mtcars$am_f <- factor(mtcars$am)\n  mtcars$cyl_f <- factor(mtcars$cyl)\n\n  mtcars$id <- factor(seq_len(nrow(mtcars)))\n  mtcars_long <- data_to_long(\n    mtcars,\n    select = c(\"mpg\", \"qsec\", \"disp\"),\n    names_to = \"g\"\n  )\n  expect_snapshot(print(mtcars_long))\n})\n\n\ntest_that(\"tell user about typos\", {\n  data(\"mtcars\")\n  expect_silent(data_to_long(\n    mtcars,\n    select = c(\"mpg\", \"hp\", \"disp\"),\n    names_to = \"time\",\n    values_to = \"count\"\n  ))\n  expect_error(\n    data_to_long(\n      mtcars,\n      select = c(\"mpg\", \"ho\", \"dist\"),\n      names_to = \"time\",\n      values_to = \"count\"\n    ),\n    regex = \"Following\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_to_numeric.R",
    "content": "test_that(\"convert data frame to numeric\", {\n  expect_snapshot(to_numeric(head(ToothGrowth), dummy_factors = TRUE))\n  expect_snapshot(to_numeric(head(ToothGrowth), dummy_factors = FALSE))\n})\n\ntest_that(\"convert character to numeric\", {\n  expect_identical(to_numeric(c(\"xyz\", \"ab\")), c(2, 1))\n})\n\ntest_that(\"convert character to numeric Date\", {\n  expect_warning(expect_identical(\n    to_numeric(as.Date(\"2022-01-01\")),\n    as.numeric(as.Date(\"2022-01-01\"))\n  ))\n  expect_warning(expect_identical(\n    to_numeric(as.POSIXct(\"2022-01-01\")),\n    as.numeric(as.POSIXct(\"2022-01-01\"))\n  ))\n  expect_warning(expect_identical(\n    to_numeric(as.POSIXlt(\"2022-01-01\")),\n    as.numeric(as.POSIXlt(\"2022-01-01\"))\n  ))\n})\n\ntest_that(\"convert character to numeric preserve levels\", {\n  x <- head(as.factor(mtcars$gear))\n  expect_identical(\n    to_numeric(x, dummy_factors = FALSE),\n    c(2, 2, 2, 1, 1, 1)\n  )\n  expect_identical(\n    to_numeric(x, dummy_factors = FALSE, preserve_levels = TRUE),\n    c(4, 4, 4, 3, 3, 3)\n  )\n})\n\ntest_that(\"convert character to numeric lowest\", {\n  d <- head(mtcars)\n  d$vs <- as.factor(d$vs)\n  model <- glm(vs ~ wt + mpg, data = d, family = \"binomial\")\n  expect_identical(\n    to_numeric(insight::get_response(model), dummy_factors = FALSE),\n    c(1, 1, 2, 2, 1, 2)\n  )\n  expect_identical(\n    to_numeric(insight::get_response(model), dummy_factors = FALSE, lowest = 0),\n    c(0, 0, 1, 1, 0, 1)\n  )\n})\n\ntest_that(\"convert factor to numeric\", {\n  f <- factor(substring(\"statistics\", 1:10, 1:10))\n  expect_snapshot(to_numeric(f, dummy_factors = TRUE))\n})\n\ntest_that(\"convert factor to numeric\", {\n  expect_identical(to_numeric(c(\"abc\", \"xyz\")), c(1, 2))\n  expect_identical(to_numeric(c(\"123\", \"789\")), c(123, 789))\n  expect_identical(to_numeric(c(\"1L\", \"2e-3\")), c(1, 0.002))\n  expect_identical(to_numeric(c(\"1L\", \"2e-3\", \"ABC\")), c(1, 2, 3))\n})\n\ntest_that(\"convert factor to numeric, dummy factors\", {\n  expect_identical(\n    to_numeric(c(\"abc\", \"xyz\"), dummy_factors = TRUE),\n    data.frame(abc = c(1, 0), xyz = c(0, 1)),\n    ignore_attr = TRUE\n  )\n  expect_identical(\n    to_numeric(c(\"1L\", \"2e-3\", \"ABC\"), dummy_factors = TRUE),\n    data.frame(`1L` = c(1, 0, 0), `2e-3` = c(0, 1, 0), ABC = c(0, 0, 1)),\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"convert factor to numeric, append\", {\n  data(efc)\n  expect_identical(\n    colnames(to_numeric(efc, dummy_factors = TRUE)),\n    c(\n      \"c12hour\",\n      \"e16sex\",\n      \"e42dep.1\",\n      \"e42dep.2\",\n      \"e42dep.3\",\n      \"e42dep.4\",\n      \"c172code\",\n      \"neg_c_7\"\n    ),\n    ignore_attr = TRUE\n  )\n  expect_identical(\n    colnames(to_numeric(efc, dummy_factors = TRUE, append = TRUE)),\n    c(\n      \"c12hour\",\n      \"e16sex\",\n      \"e42dep\",\n      \"c172code\",\n      \"neg_c_7\",\n      \"e42dep_n\",\n      \"e42dep_n.1\",\n      \"e42dep_n.2\",\n      \"e42dep_n.3\",\n      \"e42dep_n.4\"\n    ),\n    ignore_attr = TRUE\n  )\n  expect_identical(\n    colnames(to_numeric(efc, append = TRUE, dummy_factors = FALSE)),\n    c(\"c12hour\", \"e16sex\", \"e42dep\", \"c172code\", \"neg_c_7\", \"e42dep_n\"),\n    ignore_attr = TRUE\n  )\n  expect_identical(\n    colnames(to_numeric(efc, append = FALSE, dummy_factors = FALSE)),\n    c(\"c12hour\", \"e16sex\", \"e42dep\", \"c172code\", \"neg_c_7\"),\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"convert factor to numeric, all numeric\", {\n  data(mtcars)\n  expect_identical(to_numeric(mtcars), mtcars)\n})\n\ntest_that(\"convert factor to numeric, dummy factors, with NA\", {\n  x1 <- factor(rep(c(\"a\", \"b\"), 3))\n  x2 <- factor(c(\"a\", NA_character_, \"a\", \"b\", \"a\", \"b\"))\n  x3 <- factor(c(NA_character_, \"b\", \"a\", \"b\", \"a\", \"b\"))\n  x4 <- factor(c(\"a\", \"b\", \"a\", \"b\", \"a\", NA_character_))\n  x5 <- factor(c(NA_character_, \"b\", \"a\", \"b\", \"a\", NA_character_))\n  x6 <- factor(c(NA_character_, \"b\", NA_character_, \"b\", \"a\", NA_character_))\n  x7 <- factor(c(\n    NA_character_,\n    \"b\",\n    \"a\",\n    \"b\",\n    \"a\",\n    \"b\",\n    NA_character_,\n    \"b\",\n    \"a\",\n    NA_character_,\n    \"a\",\n    \"b\",\n    \"a\",\n    \"b\",\n    \"a\",\n    NA_character_\n  ))\n\n  # same observations are missing\n  expect_identical(\n    which(!complete.cases(to_numeric(x1, dummy_factors = TRUE))),\n    which(is.na(x1))\n  )\n  expect_identical(\n    which(!complete.cases(to_numeric(x2, dummy_factors = TRUE))),\n    which(is.na(x2))\n  )\n  expect_identical(\n    which(!complete.cases(to_numeric(x3, dummy_factors = TRUE))),\n    which(is.na(x3))\n  )\n  expect_identical(\n    which(!complete.cases(to_numeric(x4, dummy_factors = TRUE))),\n    which(is.na(x4))\n  )\n  expect_identical(\n    which(!complete.cases(to_numeric(x5, dummy_factors = TRUE))),\n    which(is.na(x5))\n  )\n  expect_identical(\n    which(!complete.cases(to_numeric(x6, dummy_factors = TRUE))),\n    which(is.na(x6))\n  )\n  expect_identical(\n    which(!complete.cases(to_numeric(x7, dummy_factors = TRUE))),\n    which(is.na(x7))\n  )\n\n  # output has same number of observation as input\n  expect_identical(nrow(to_numeric(x1, dummy_factors = TRUE)), length(x1))\n  expect_identical(nrow(to_numeric(x2, dummy_factors = TRUE)), length(x2))\n  expect_identical(nrow(to_numeric(x3, dummy_factors = TRUE)), length(x3))\n  expect_identical(nrow(to_numeric(x4, dummy_factors = TRUE)), length(x4))\n  expect_identical(nrow(to_numeric(x5, dummy_factors = TRUE)), length(x5))\n  expect_identical(nrow(to_numeric(x6, dummy_factors = TRUE)), length(x6))\n  expect_identical(nrow(to_numeric(x7, dummy_factors = TRUE)), length(x7))\n})\n\ntest_that(\"to_numeric, inverse factor levels\", {\n  f <- c(0, 0, 1, 1, 1, 0)\n  x1 <- factor(f, levels = c(0, 1))\n  x2 <- factor(f, levels = c(1, 0))\n  out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = FALSE)\n  expect_identical(out, c(1, 1, 2, 2, 2, 1))\n  out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = FALSE)\n  expect_identical(out, c(2, 2, 1, 1, 1, 2))\n  out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = TRUE)\n  expect_identical(out, c(0, 0, 1, 1, 1, 0))\n  out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = TRUE)\n  expect_identical(out, c(1, 1, 0, 0, 0, 1))\n})\n\n# select helpers ------------------------------\ntest_that(\"to_numeric regex\", {\n  expect_identical(\n    to_numeric(mtcars, select = \"pg\", regex = TRUE),\n    to_numeric(mtcars, select = \"mpg\")\n  )\n})\n\n\ntest_that(\"to_numeric works with haven_labelled, convert many labels correctly\", {\n  skip_on_cran()\n  skip_if_not_installed(\"httr\")\n  skip_if_not_installed(\"haven\")\n  skip_if_not_installed(\"withr\")\n  skip_if_not_installed(\"curl\")\n  skip_if_offline()\n\n  withr::with_tempfile(\"temp_file\", fileext = \".sav\", code = {\n    request <- httr::GET(\n      \"https://raw.github.com/easystats/circus/main/data/EFC.sav\"\n    )\n    httr::stop_for_status(request)\n    writeBin(httr::content(request, type = \"raw\"), temp_file)\n\n    d <- haven::read_spss(temp_file)\n    x <- to_numeric(d$c172code)\n    expect_identical(as.vector(table(x)), c(180L, 506L, 156L))\n  })\n})\n\n\ntest_that(\"to_numeric preserves correct label order\", {\n  x <- factor(c(1, 2, 3, 4))\n  x <- assign_labels(x, values = c(\"one\", \"two\", \"three\", \"four\"))\n  out <- to_numeric(x, dummy_factors = FALSE)\n  expect_identical(\n    attributes(out)$labels,\n    c(one = 1, two = 2, three = 3, four = 4)\n  )\n  # correctly reverse scale\n  out <- to_numeric(reverse_scale(x), dummy_factors = FALSE)\n  expect_identical(\n    attributes(out)$labels,\n    c(one = 4, two = 3, three = 2, four = 1)\n  )\n  # factor with alphabetical values\n  x <- factor(letters[1:4])\n  x <- assign_labels(x, values = c(\"one\", \"two\", \"three\", \"four\"))\n  out <- to_numeric(x, dummy_factors = FALSE)\n  expect_identical(\n    attributes(out)$labels,\n    c(one = 1, two = 2, three = 3, four = 4)\n  )\n  # correctly reverse scale\n  out <- to_numeric(reverse_scale(x), dummy_factors = FALSE)\n  expect_identical(\n    attributes(out)$labels,\n    c(one = 4, two = 3, three = 2, four = 1)\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_to_wide.R",
    "content": "test_that(\"data_to_wide works\", {\n  long_data <- data.frame(\n    Row_ID = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5),\n    name = c(\n      \"X1\",\n      \"X1\",\n      \"X1\",\n      \"X1\",\n      \"X1\",\n      \"X2\",\n      \"X2\",\n      \"X2\",\n      \"X2\",\n      \"X2\",\n      \"X3\",\n      \"X3\",\n      \"X3\",\n      \"X3\",\n      \"X3\"\n    ),\n    value = c(3L, 2L, 5L, 4L, 1L, 3L, 1L, 2L, 5L, 4L, 2L, 3L, 1L, 4L, 5L),\n    stringsAsFactors = FALSE\n  )\n\n  expect_equal(\n    data_to_wide(\n      long_data,\n      names_from = \"name\",\n      values_from = \"value\",\n      id_cols = \"Row_ID\"\n    ),\n    data.frame(\n      Row_ID = c(1, 2, 3, 4, 5),\n      X1 = c(3L, 2L, 5L, 4L, 1L),\n      X2 = c(3L, 1L, 2L, 5L, 4L),\n      X3 = c(2L, 3L, 1L, 4L, 5L),\n      stringsAsFactors = FALSE\n    ),\n    ignore_attr = TRUE,\n    tolerance = 1e-3\n  )\n\n  long_data$X1 <- 5\n  expect_error(\n    data_to_wide(\n      long_data,\n      names_from = \"name\",\n      values_from = \"value\",\n      id_cols = \"Row_ID\"\n    ),\n    regexp = \"Some values of the columns specified in `names_from`\"\n  )\n})\n\n\ntest_that(\"data_to_wide, names_prefix works\", {\n  skip_if_not_installed(\"tidyr\")\n\n  out <- data_to_wide(\n    tidyr::fish_encounters,\n    names_from = \"station\",\n    values_from = \"seen\",\n    names_prefix = \"foo_\"\n  )\n\n  expect_named(\n    out,\n    c(\n      \"fish\",\n      \"foo_Release\",\n      \"foo_I80_1\",\n      \"foo_Lisbon\",\n      \"foo_Rstr\",\n      \"foo_Base_TD\",\n      \"foo_BCE\",\n      \"foo_BCW\",\n      \"foo_BCE2\",\n      \"foo_BCW2\",\n      \"foo_MAE\",\n      \"foo_MAW\"\n    )\n  )\n})\n\n\ntest_that(\"data_to_wide, values_fill deprecated\", {\n  skip_if_not_installed(\"tidyr\")\n\n  expect_warning(\n    data_to_wide(\n      tidyr::fish_encounters,\n      names_from = \"station\",\n      values_from = \"seen\",\n      values_fill = c(1, 2)\n    ),\n    regexp = \"`values_fill` is defunct\",\n    fixed = TRUE\n  )\n})\n\n\n# EQUIVALENCE WITH TIDYR - PIVOT_WIDER -----------------------------------------------\n\n# Examples coming from: https://tidyr.tidyverse.org/articles/pivot.html#wider\n# and from https://github.com/tidyverse/tidyr/blob/main/tests/testthat/test-pivot-wide.R\n\n### From tidyr tests\n\ntest_that(\"can pivot all cols to wide\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- tidyr::tibble(key = c(\"x\", \"y\", \"z\"), val = 1:3)\n  pv <- data_to_wide(df, names_from = \"key\", values_from = \"val\")\n\n  expect_named(pv, c(\"x\", \"y\", \"z\"))\n  expect_identical(nrow(pv), 1L)\n})\n\ntest_that(\"non-pivoted cols are preserved\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- tidyr::tibble(a = 1, key = c(\"x\", \"y\"), val = 1:2)\n  pv <- data_to_wide(df, names_from = \"key\", values_from = \"val\")\n\n  expect_named(pv, c(\"a\", \"x\", \"y\"))\n  expect_identical(nrow(pv), 1L)\n})\n\ntest_that(\"implicit missings turn into explicit missings\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- tidyr::tibble(a = 1:2, key = c(\"x\", \"y\"), val = 1:2)\n  pv <- data_to_wide(df, names_from = \"key\", values_from = \"val\")\n\n  expect_identical(pv$a, c(1L, 2L))\n  expect_identical(pv$x, c(1L, NA))\n  expect_identical(pv$y, c(NA, 2L))\n})\n\ntest_that(\"error when overwriting existing column\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- tidyr::tibble(\n    a = c(1, 1),\n    key = c(\"a\", \"b\"),\n    val = c(1, 2)\n  )\n\n  expect_error(\n    data_to_wide(df, names_from = \"key\", values_from = \"val\"),\n    regexp = \"Some values of the columns specified\"\n  )\n})\n\ntest_that(\"data_to_wide: fill values, #293\", {\n  skip_if_not_installed(\"tidyr\")\n\n  weekdays <- c(\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\")\n\n  daily <- tidyr::tibble(\n    day = factor(c(\"Tue\", \"Thu\", \"Fri\", \"Mon\"), levels = weekdays),\n    value = c(2, 3, 1, 5),\n    type = factor(c(\"A\", \"B\", \"B\", \"A\"))\n  )\n\n  expect_identical(\n    tidyr::pivot_wider(\n      daily,\n      names_from = type,\n      values_from = value\n    ),\n    data_to_wide(\n      daily,\n      names_from = \"type\",\n      values_from = \"value\"\n    )\n  )\n})\n\ntest_that(\"data_to_wide, id_cols works correctly, #293\", {\n  skip_if_not_installed(\"tidyr\")\n\n  updates <- tidyr::tibble(\n    county = c(\"Wake\", \"Wake\", \"Wake\", \"Guilford\", \"Guilford\"),\n    date = c(as.Date(\"2020-01-01\") + 0:2, as.Date(\"2020-01-03\") + 0:1),\n    system = c(\"A\", \"B\", \"C\", \"A\", \"C\"),\n    value = c(3.2, 4, 5.5, 2, 1.2)\n  )\n\n  expect_identical(\n    tidyr::pivot_wider(\n      updates,\n      id_cols = county,\n      names_from = system,\n      values_from = value\n    ),\n    data_to_wide(\n      updates,\n      id_cols = \"county\",\n      names_from = \"system\",\n      values_from = \"value\"\n    )\n  )\n})\n\n\n### Examples from tidyr website\n\ntest_that(\"data_to_wide equivalent to pivot_wider: ex 1\", {\n  skip_if_not_installed(\"tidyr\")\n\n  x <- tidyr::pivot_wider(\n    tidyr::fish_encounters,\n    names_from = \"station\",\n    values_from = \"seen\"\n  )\n\n  y <- data_to_wide(\n    tidyr::fish_encounters,\n    names_from = \"station\",\n    values_from = \"seen\"\n  )\n\n  expect_equal(x, y, ignore_attr = TRUE)\n})\n\ntest_that(\"data_to_wide equivalent to pivot_wider: ex 2\", {\n  skip_if_not_installed(\"tidyr\")\n\n  production <- tidyr::expand_grid(\n    product = c(\"A\", \"B\"),\n    country = c(\"AI\", \"EI\"),\n    year = 2000:2014\n  ) %>%\n    data_filter((product == \"A\" & country == \"AI\") | product == \"B\")\n\n  production$production <- rnorm(nrow(production))\n\n  x <- production %>%\n    tidyr::pivot_wider(\n      names_from = c(product, country),\n      values_from = production\n    )\n\n  y <- production %>%\n    data_to_wide(\n      names_from = c(\"product\", \"country\"),\n      values_from = \"production\"\n    )\n\n  expect_identical(x, y)\n})\n\ntest_that(\"data_to_wide equivalent to pivot_wider: ex 3\", {\n  skip_if_not_installed(\"tidyr\")\n\n  x <- tidyr::us_rent_income %>%\n    tidyr::pivot_wider(\n      names_from = variable,\n      values_from = c(estimate, moe)\n    )\n\n  y <- tidyr::us_rent_income %>%\n    data_to_wide(\n      names_from = \"variable\",\n      values_from = c(\"estimate\", \"moe\")\n    )\n\n  expect_identical(x, y)\n})\n\ntest_that(\"data_to_wide equivalent to pivot_wider: ex 4\", {\n  skip_if_not_installed(\"tidyr\")\n\n  x <- tidyr::us_rent_income %>%\n    tidyr::pivot_wider(\n      names_from = variable,\n      names_sep = \".\",\n      values_from = c(estimate, moe)\n    )\n\n  y <- tidyr::us_rent_income %>%\n    data_to_wide(\n      names_from = \"variable\",\n      names_sep = \".\",\n      values_from = c(\"estimate\", \"moe\")\n    )\n\n  expect_identical(x, y)\n})\n\ntest_that(\"data_to_wide equivalent to pivot_wider: ex 5\", {\n  skip_if_not_installed(\"tidyr\")\n\n  contacts <- tidyr::tribble(\n    ~field, ~value,\n    \"name\", \"Jiena McLellan\",\n    \"company\", \"Toyota\",\n    \"name\", \"John Smith\",\n    \"company\", \"google\",\n    \"email\", \"john@google.com\",\n    \"name\", \"Huxley Ratcliffe\"\n  )\n  contacts$person_id <- cumsum(contacts$field == \"name\")\n\n  x <- tidyr::pivot_wider(contacts, names_from = field, values_from = value)\n  y <- data_to_wide(contacts, names_from = \"field\", values_from = \"value\")\n\n  expect_identical(x, y)\n})\n\n\ntest_that(\"data_to_wide equivalent to pivot_wider: ex 6\", {\n  skip_if_not_installed(\"tidyr\")\n\n  production <- tidyr::expand_grid(\n    product = c(\"A\", \"B\"),\n    country = c(\"AI\", \"EI\"),\n    year = 2000:2014\n  ) %>%\n    data_filter((product == \"A\" & country == \"AI\") | product == \"B\")\n\n  production$production <- rnorm(nrow(production))\n\n  x <- production %>%\n    tidyr::pivot_wider(\n      names_from = c(product, country),\n      values_from = production,\n      names_glue = \"prod_{product}_{country}\"\n    )\n\n  y <- production %>%\n    data_to_wide(\n      names_from = c(\"product\", \"country\"),\n      values_from = \"production\",\n      names_glue = \"prod_{product}_{country}\"\n    )\n\n  expect_identical(x, y)\n})\n\n\ntest_that(\"data_to_wide, names_glue works\", {\n  skip_if_not_installed(\"tidyr\")\n\n  df <- data.frame(\n    food = c(\n      \"banana\",\n      \"banana\",\n      \"banana\",\n      \"banana\",\n      \"cheese\",\n      \"cheese\",\n      \"cheese\",\n      \"cheese\"\n    ),\n    binary = rep(c(\"yes\", \"no\"), 4),\n    car = c(\n      \"toyota\",\n      \"subaru\",\n      \"mazda\",\n      \"skoda\",\n      \"toyota\",\n      \"subaru\",\n      \"mazda\",\n      \"skoda\"\n    ),\n    fun = c(2, 4, 3, 6, 2, 4, 2, 3),\n    stringsAsFactors = FALSE\n  )\n\n  x <- df %>%\n    tidyr::pivot_wider(\n      id_cols = food,\n      names_from = c(car, binary),\n      names_glue = \"{binary}_{car}\",\n      values_from = fun\n    )\n\n  y <- df %>%\n    data_to_wide(\n      id_cols = \"food\",\n      names_from = c(\"car\", \"binary\"),\n      names_glue = \"{binary}_{car}\",\n      values_from = \"fun\"\n    )\n\n  expect_identical(x, y, ignore_attr = TRUE)\n})\n\n\ntest_that(\"preserve date format\", {\n  skip_if_not_installed(\"tidyr\")\n\n  family <- tidyr::tibble(\n    family = c(1L, 1L, 2L, 2L, 3L, 3L),\n    child = c(\n      \"dob_child1\",\n      \"dob_child2\",\n      \"dob_child1\",\n      \"dob_child2\",\n      \"dob_child1\",\n      \"dob_child2\"\n    ),\n    value = as.Date(c(\n      \"1998-11-26\",\n      \"2000-01-29\",\n      \"2004-10-10\",\n      NA,\n      \"2000-12-05\",\n      \"2004-04-05\"\n    ))\n  )\n\n  tidyr <- tidyr::pivot_wider(\n    family,\n    names_from = \"child\",\n    values_from = \"value\"\n  )\n  datawiz <- data_to_wide(family, names_from = \"child\", values_from = \"value\")\n\n  expect_identical(tidyr, datawiz)\n})\n\n\ntest_that(\"#293\", {\n  skip_if_not_installed(\"tidyr\")\n\n  weekdays <- c(\"Mon\", \"Tue\", \"Wed\", \"Thu\", \"Fri\", \"Sat\", \"Sun\")\n\n  daily <- tidyr::tibble(\n    day = factor(c(\"Tue\", \"Thu\", \"Fri\", \"Mon\"), levels = weekdays),\n    value = c(2, 3, 1, 5)\n  )\n\n  expect_identical(\n    tidyr::pivot_wider(daily, names_from = day, values_from = value),\n    data_to_wide(daily, names_from = \"day\", values_from = \"value\")\n  )\n})\n\n\ntest_that(\"new names starting with digits are not corrected automatically\", {\n  skip_if_not_installed(\"tidyr\")\n\n  percentages <- tidyr::tibble(\n    year = c(2018, 2019, 2020, 2020),\n    type = factor(c(\"A\", \"B\", \"A\", \"B\"), levels = c(\"A\", \"B\")),\n    percentage = c(100, 100, 40, 60)\n  )\n\n  tidyr <- tidyr::pivot_wider(\n    percentages,\n    names_from = c(year, type),\n    values_from = percentage\n  )\n  datawiz <- data_to_wide(\n    percentages,\n    names_from = c(\"year\", \"type\"),\n    values_from = \"percentage\"\n  )\n  expect_identical(tidyr, datawiz)\n})\n\n\ntest_that(\"Preserve column name when names_from column only has one unique value\", {\n  d <- data.frame(\n    Value = rnorm(10),\n    Level = paste0(\"Participant_\", 1:10),\n    Parameter = \"Intercept\",\n    stringsAsFactors = FALSE\n  )\n  out <- data_to_wide(\n    d,\n    values_from = \"Value\",\n    names_from = \"Parameter\",\n    names_sep = \"_\"\n  )\n  expect_named(out, c(\"Level\", \"Intercept\"))\n  expect_identical(nrow(out), 10L)\n\n  d <- data.frame(\n    Value = rnorm(10),\n    Level = paste0(\"Participant_\", 1:10),\n    Parameter = c(\"Intercept\", \"abc\"),\n    stringsAsFactors = FALSE\n  )\n  out <- data_to_wide(\n    d,\n    values_from = \"Value\",\n    names_from = \"Parameter\",\n    names_sep = \"_\"\n  )\n  expect_named(out, c(\"Level\", \"Intercept\", \"abc\"))\n  expect_identical(nrow(out), 10L)\n})\n\n\ntest_that(\"data_to_wide with multiple values_from and unbalanced panel\", {\n  skip_if_not_installed(\"tidyr\")\n\n  long_df <- tidyr::tibble(\n    subject_id = c(1, 1, 2, 2, 3, 5, 4, 4),\n    time = rep(c(1, 2), 4),\n    score = c(10, NA, 15, 12, 18, 11, NA, 14),\n    anxiety = c(5, 7, 6, NA, 8, 4, 5, NA)\n  )\n\n  tidyr <- tidyr::pivot_wider(\n    long_df,\n    id_cols = \"subject_id\",\n    names_from = time,\n    values_from = c(score, anxiety)\n  )\n  datawiz <- data_to_wide(\n    long_df,\n    id_cols = \"subject_id\",\n    names_from = \"time\",\n    values_from = c(\"score\", \"anxiety\")\n  )\n  expect_identical(tidyr, datawiz)\n})\n\n\ntest_that(\"data_to_wide preserves empty columns\", {\n  long_df <- data.frame(\n    subject_id = c(1, 1, 2, 2, 3, 5, 4, 4),\n    time = rep(c(1, 2), 4),\n    score = c(10, NA, 15, 12, 18, 11, NA, 14),\n    anxiety = c(5, 7, 6, NA, 8, 4, 5, NA),\n    test = rep(NA_real_, 8)\n  )\n\n  out <- data_to_wide(\n    long_df,\n    id_cols = \"subject_id\",\n    names_from = \"time\",\n    values_from = c(\"score\", \"anxiety\", \"test\")\n  )\n\n  expect_equal(\n    out,\n    data.frame(\n      subject_id = c(1, 2, 3, 5, 4),\n      score_1 = c(10, 15, 18, NA, NA),\n      score_2 = c(NA, 12, NA, 11, 14),\n      anxiety_1 = c(5, 6, 8, NA, 5),\n      anxiety_2 = c(7, NA, NA, 4, NA),\n      test_1 = as.double(c(NA, NA, NA, NA, NA)),\n      test_2 = as.double(c(NA, NA, NA, NA, NA))\n    ),\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"data_to_wide, check for valid columns\", {\n  long_df <- data.frame(\n    subject_id = c(1, 1, 2, 2, 3, 5, 4, 4),\n    time = rep(c(1, 2), 4),\n    score = c(10, NA, 15, 12, 18, 11, NA, 14),\n    anxiety = c(5, 7, 6, NA, 8, 4, 5, NA),\n    test = rep(NA_real_, 8)\n  )\n\n  expect_error(\n    data_to_wide(\n      long_df,\n      id_cols = \"id\",\n      names_from = \"time\",\n      values_from = c(\"score\", \"anxiety\", \"test\")\n    ),\n    regexp = \"`id_cols` must be the names of\",\n    fixed = TRUE\n  )\n\n  expect_error(\n    data_to_wide(\n      long_df,\n      id_cols = \"subject_id\",\n      names_from = \"times\",\n      values_from = c(\"score\", \"anxiety\", \"test\")\n    ),\n    regexp = \"`names_from` must be the name of\",\n    fixed = TRUE\n  )\n\n  expect_warning(\n    data_to_wide(\n      long_df,\n      id_cols = \"subject_id\",\n      names_from = \"time\",\n      values_from = c(\"scores\", \"anxiety\", \"test\")\n    ),\n    regexp = \"Following variable(s) were not found\",\n    fixed = TRUE\n  )\n\n  expect_error(\n    expect_warning(expect_warning(expect_warning(\n      data_to_wide(\n        long_df,\n        id_cols = \"subject_id\",\n        names_from = \"time\",\n        values_from = c(\"a\", \"b\", \"c\")\n      )\n    ))),\n    regexp = \"No variable defined\",\n    fixed = TRUE\n  )\n})\n\n\ntest_that(\"data_to_wide, select helper for values_from\", {\n  long_df <- data.frame(\n    subject_id = c(1, 1, 2, 2, 3, 5, 4, 4),\n    time = rep(c(1, 2), 4),\n    score_a = c(10, NA, 15, 12, 18, 11, NA, 14),\n    score_b = c(5, 7, 6, NA, 8, 4, 5, NA),\n    score_c = rep(NA_real_, 8)\n  )\n\n  out <- data_to_wide(\n    long_df,\n    id_cols = \"subject_id\",\n    names_from = \"time\",\n    values_from = starts_with(\"score_\")\n  )\n\n  expect_equal(\n    out,\n    data.frame(\n      subject_id = c(1, 2, 3, 5, 4),\n      score_a_1 = c(10, 15, 18, NA, NA),\n      score_a_2 = c(NA, 12, NA, 11, 14),\n      score_a_1 = c(5, 6, 8, NA, 5),\n      score_a_2 = c(7, NA, NA, 4, NA),\n      score_a_1 = as.double(c(NA, NA, NA, NA, NA)),\n      score_a_2 = as.double(c(NA, NA, NA, NA, NA))\n    ),\n    ignore_attr = TRUE\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_unique.R",
    "content": "# Preparations\n\ndf1 <- data.frame(\n  id = c(1, 2, 3, 1, 3),\n  year = c(2022, 2022, 2022, 2022, 2000),\n  item1 = c(NA, 1, 1, 2, 3),\n  item2 = c(NA, 1, 1, 2, 3),\n  item3 = c(NA, 1, 1, 2, 3)\n)\n\nexpected1 <- data.frame(\n  id = c(1, 2, 3),\n  year = c(2022, 2022, 2022),\n  item1 = c(2, 1, 1),\n  item2 = c(2, 1, 1),\n  item3 = c(2, 1, 1)\n)\n\nexpected2 <- data.frame(\n  id = c(1, 2, 3),\n  year = c(2022, 2022, 2022),\n  item1 = c(NA, 1, 1),\n  item2 = c(NA, 1, 1),\n  item3 = c(NA, 1, 1)\n)\n\nexpected3 <- data.frame(\n  id = c(1, 2, 3),\n  year = c(2022, 2022, 2000),\n  item1 = c(2, 1, 3),\n  item2 = c(2, 1, 3),\n  item3 = c(2, 1, 3)\n)\n\nexpected4 <- data.frame(\n  id = c(1, 2, 3, 3),\n  year = c(2022, 2022, 2022, 2000),\n  item1 = c(2, 1, 1, 3),\n  item2 = c(2, 1, 1, 3),\n  item3 = c(2, 1, 1, 3)\n)\n\n# Testing\n\ntest_that(\"data_unique returns original data if no duplicates\", {\n  test <- data.frame(x = c(1, 2), y = c(3, 4))\n  expect_identical(\n    data_unique(test, c(\"x\", \"y\"), verbose = FALSE),\n    test\n  )\n  expect_identical(\n    data_unique(test, \"x\", verbose = FALSE),\n    test\n  )\n})\n\ntest_that(\"data_unique basic\", {\n  expect_identical(\n    data_unique(df1, select = \"id\", verbose = FALSE),\n    expected1\n  )\n})\n\ntest_that(\"data_unique basic method best\", {\n  expect_identical(\n    data_unique(df1, select = \"id\", keep = \"best\", verbose = FALSE),\n    expected1\n  )\n})\n\ntest_that(\"data_unique basic method first\", {\n  expect_identical(\n    data_unique(df1, select = \"id\", keep = \"first\", verbose = FALSE),\n    expected2\n  )\n})\n\ntest_that(\"data_unique basic method last\", {\n  expect_identical(\n    data_unique(df1, select = \"id\", keep = \"last\", verbose = FALSE),\n    expected3\n  )\n})\n\ntest_that(\"data_unique unquoted\", {\n  expect_identical(\n    data_unique(df1, select = id, verbose = FALSE),\n    expected1\n  )\n})\n\ntest_that(\"data_unique vector\", {\n  expect_identical(\n    data_unique(df1, select = 1, verbose = FALSE),\n    expected1\n  )\n})\n\ntest_that(\"data_unique select-helper\", {\n  expect_identical(\n    data_unique(df1, select = contains(\"id\"), verbose = FALSE),\n    expected1\n  )\n})\n\ntest_that(\"data_unique multiple IDs\", {\n  x <- data_unique(df1, select = c(\"id\", \"year\"), verbose = FALSE)\n  rownames(x) <- NULL\n  expect_identical(\n    x,\n    expected4\n  )\n})\n\ntest_that(\"data_unique multiple IDs formula\", {\n  x <- data_unique(df1, select = ~ id + year, verbose = FALSE)\n  rownames(x) <- NULL\n  expect_identical(\n    x,\n    expected4\n  )\n})\n\ntest_that(\"data_unique multiple IDs vector\", {\n  x <- data_unique(df1, select = 1:2, verbose = FALSE)\n  rownames(x) <- NULL\n  expect_identical(\n    x,\n    expected4\n  )\n})\n\ntest_that(\"data_unique preserve attributes\", {\n  attr(df1, \"testing\") <- \"custom.attribute\"\n  x <- attributes(data_unique(df1, id, verbose = FALSE))\n  expect_identical(\n    x$testing,\n    \"custom.attribute\"\n  )\n})\n\ntest_that(\"data_unique, arg 'verbose' works\", {\n  expect_message(\n    data_unique(df1, select = ~ id + year),\n    \"removed, with method\"\n  )\n})\n\ntest_that(\"data_unique works with groups\", {\n  df <- data.frame(\n    g = c(1, 1, 2, 2),\n    x = c(1, 1, 2, 1)\n  )\n  df <- data_group(df, \"g\")\n\n  expected <- data.frame(\n    g = c(1, 2, 2),\n    x = c(1, 2, 1)\n  )\n  expected <- data_group(expected, \"g\")\n\n  x <- data_unique(df, \"x\", verbose = FALSE)\n  expect_identical(x, expected, ignore_attr = TRUE)\n\n  y <- attributes(x)\n\n  expect_identical(attributes(df)$class, y$class)\n  expect_identical(attributes(df)$groups, y$groups)\n})\n"
  },
  {
    "path": "tests/testthat/test-data_unite.R",
    "content": "d_unite <- data.frame(\n  x = c(NA, 1:3),\n  y = c(letters[1:3], NA_character_),\n  z = 6:9,\n  m = c(\"X\", NA_character_, \"Y\", \"Z\"),\n  n = c(\"NATION\", \"COUNTRY\", \"NATION\", NA_character_),\n  stringsAsFactors = FALSE\n)\n\n\n# for following tests, we need to check for correct column names,\n# and correct values in new variable\n\ntest_that(\"data_unite: simple use case\", {\n  # basic\n  out <- data_unite(d_unite, new_column = \"xyz\")\n  expect_identical(colnames(out), \"xyz\")\n  expect_identical(\n    out$xyz,\n    c(\"NA_a_6_X_NATION\", \"1_b_7_NA_COUNTRY\", \"2_c_8_Y_NATION\", \"3_NA_9_Z_NA\")\n  )\n  # use existing column name\n  out <- data_unite(d_unite, new_column = \"x\")\n  expect_identical(colnames(out), \"x\")\n  expect_identical(\n    out$x,\n    c(\"NA_a_6_X_NATION\", \"1_b_7_NA_COUNTRY\", \"2_c_8_Y_NATION\", \"3_NA_9_Z_NA\")\n  )\n  # select\n  out <- data_unite(d_unite, new_column = \"xyz\", select = c(\"x\", \"n\"))\n  expect_identical(\n    colnames(out),\n    c(setdiff(colnames(d_unite), c(\"x\", \"n\")), \"xyz\")\n  )\n  expect_identical(\n    out$xyz,\n    c(\"NA_NATION\", \"1_COUNTRY\", \"2_NATION\", \"3_NA\")\n  )\n  # select, use existing column name\n  out <- data_unite(d_unite, new_column = \"x\", select = c(\"x\", \"n\"))\n  expect_identical(\n    colnames(out),\n    c(setdiff(colnames(d_unite), c(\"x\", \"n\")), \"x\")\n  )\n  expect_identical(\n    out$x,\n    c(\"NA_NATION\", \"1_COUNTRY\", \"2_NATION\", \"3_NA\")\n  )\n})\n\n\ntest_that(\"data_unite: remove_na\", {\n  # basic\n  out <- data_unite(d_unite, new_column = \"xyz\", remove_na = TRUE)\n  expect_identical(colnames(out), \"xyz\")\n  expect_identical(\n    out$xyz,\n    c(\"a_6_X_NATION\", \"1_b_7_COUNTRY\", \"2_c_8_Y_NATION\", \"3_9_Z\")\n  )\n  # use existing column name\n  out <- data_unite(d_unite, new_column = \"x\", remove_na = TRUE)\n  expect_identical(colnames(out), \"x\")\n  expect_identical(\n    out$x,\n    c(\"a_6_X_NATION\", \"1_b_7_COUNTRY\", \"2_c_8_Y_NATION\", \"3_9_Z\")\n  )\n  # select\n  out <- data_unite(\n    d_unite,\n    new_column = \"xyz\",\n    remove_na = TRUE,\n    select = c(\"x\", \"n\")\n  )\n  expect_identical(\n    colnames(out),\n    c(setdiff(colnames(d_unite), c(\"x\", \"n\")), \"xyz\")\n  )\n  expect_identical(\n    out$xyz,\n    c(\"NATION\", \"1_COUNTRY\", \"2_NATION\", \"3\")\n  )\n  # select, use existing column name\n  out <- data_unite(\n    d_unite,\n    new_column = \"x\",\n    remove_na = TRUE,\n    select = c(\"x\", \"n\")\n  )\n  expect_identical(\n    colnames(out),\n    c(setdiff(colnames(d_unite), c(\"x\", \"n\")), \"x\")\n  )\n  expect_identical(\n    out$x,\n    c(\"NATION\", \"1_COUNTRY\", \"2_NATION\", \"3\")\n  )\n})\n\n\ntest_that(\"data_unite: append\", {\n  # basic\n  out <- data_unite(d_unite, new_column = \"xyz\", append = TRUE)\n  expect_identical(colnames(out), c(\"x\", \"y\", \"z\", \"m\", \"n\", \"xyz\"))\n  expect_identical(\n    out$xyz,\n    c(\"NA_a_6_X_NATION\", \"1_b_7_NA_COUNTRY\", \"2_c_8_Y_NATION\", \"3_NA_9_Z_NA\")\n  )\n  # remove NA\n  out <- data_unite(\n    d_unite,\n    new_column = \"xyz\",\n    remove_na = TRUE,\n    append = TRUE\n  )\n  expect_identical(colnames(out), c(\"x\", \"y\", \"z\", \"m\", \"n\", \"xyz\"))\n  expect_identical(\n    out$xyz,\n    c(\"a_6_X_NATION\", \"1_b_7_COUNTRY\", \"2_c_8_Y_NATION\", \"3_9_Z\")\n  )\n  # append, using existing column name\n  expect_message({\n    out <- data_unite(d_unite, new_column = \"x\", append = TRUE)\n  })\n  expect_identical(colnames(out), c(\"x\", \"y\", \"z\", \"m\", \"n\"))\n  expect_identical(\n    out$x,\n    c(\"NA_a_6_X_NATION\", \"1_b_7_NA_COUNTRY\", \"2_c_8_Y_NATION\", \"3_NA_9_Z_NA\")\n  )\n  # append, using existing column name, and remove NA\n  expect_message({\n    out <- data_unite(\n      d_unite,\n      new_column = \"x\",\n      remove_na = TRUE,\n      append = TRUE\n    )\n  })\n  expect_identical(colnames(out), c(\"x\", \"y\", \"z\", \"m\", \"n\"))\n  expect_identical(\n    out$x,\n    c(\"a_6_X_NATION\", \"1_b_7_COUNTRY\", \"2_c_8_Y_NATION\", \"3_9_Z\")\n  )\n})\n\n\ntest_that(\"data_unite: combine select and append\", {\n  # basic\n  out <- data_unite(\n    d_unite,\n    new_column = \"xyz\",\n    append = TRUE,\n    select = c(\"x\", \"n\")\n  )\n  expect_identical(colnames(out), c(\"x\", \"y\", \"z\", \"m\", \"n\", \"xyz\"))\n  expect_identical(\n    out$xyz,\n    c(\"NA_NATION\", \"1_COUNTRY\", \"2_NATION\", \"3_NA\")\n  )\n  # remove NA\n  out <- data_unite(\n    d_unite,\n    new_column = \"xyz\",\n    remove_na = TRUE,\n    append = TRUE,\n    select = c(\"x\", \"n\")\n  )\n  expect_identical(colnames(out), c(\"x\", \"y\", \"z\", \"m\", \"n\", \"xyz\"))\n  expect_identical(\n    out$xyz,\n    c(\"NATION\", \"1_COUNTRY\", \"2_NATION\", \"3\")\n  )\n  # append, using existing column name\n  expect_message({\n    out <- data_unite(\n      d_unite,\n      new_column = \"x\",\n      append = TRUE,\n      select = c(\"x\", \"n\")\n    )\n  })\n  expect_identical(colnames(out), c(\"x\", \"y\", \"z\", \"m\", \"n\"))\n  expect_identical(\n    out$x,\n    c(\"NA_NATION\", \"1_COUNTRY\", \"2_NATION\", \"3_NA\")\n  )\n  # append, using existing column name, and remove NA\n  expect_message({\n    out <- data_unite(\n      d_unite,\n      new_column = \"x\",\n      remove_na = TRUE,\n      append = TRUE,\n      select = c(\"x\", \"n\")\n    )\n  })\n  expect_identical(colnames(out), c(\"x\", \"y\", \"z\", \"m\", \"n\"))\n  expect_identical(\n    out$x,\n    c(\"NATION\", \"1_COUNTRY\", \"2_NATION\", \"3\")\n  )\n})\n\n\ntest_that(\"data_unite: errors\", {\n  expect_error(data_unite(d_unite), regex = \"No name\")\n  expect_error(\n    data_unite(d_unite, new_column = c(\"a\", \"b\")),\n    regex = \"a single string\"\n  )\n  expect_error(\n    expect_warning(data_unite(d_unite, new_column = \"a\", select = \"huhu\")),\n    regex = \"At least\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-data_write.R",
    "content": "skip_if_not_installed(\"httr\")\nskip_if_not_installed(\"haven\")\nskip_if_not_installed(\"readr\")\n\nskip_on_cran()\n\nskip_if_not_installed(\"curl\")\nskip_if_offline()\n\n# prepare data set ---------------\n\ndata(efc)\nd <- data_filter(efc, 1:5)\nd$e42dep <- droplevels(d$e42dep)\n\n\n# data encryption with rds ------------------\n\ntest_that(\"data_write, encrypting rds files\", {\n  skip_if_not_installed(\"withr\")\n  skip_if_not_installed(\"openssl\")\n  withr::with_tempfile(\"tmp\", fileext = \".rds\", code = {\n    expect_warning(data_write(d, tmp, password = \"test\"), \"Remember\")\n\n    # no password, returns encrypted data frame\n    d2 <- data_read(tmp, verbose = FALSE)\n    expect_named(d2, \"out\")\n    expect_false(identical(d, d2))\n\n    # password, returns decrypted data frame\n    d2 <- data_read(tmp, password = \"test\")\n    expect_identical(d, d2)\n\n    # wrong password\n    expect_error(data_read(tmp, password = \"text\"), \"File does not appear\")\n\n    # invalid password arguments\n    expect_error(\n      data_read(tmp, password = c(\"test\", \"test2\")),\n      regex = \"The password must be a single\"\n    )\n    expect_error(\n      data_read(tmp, password = 123),\n      regex = \"The password must be a single\"\n    )\n    expect_error(\n      data_read(tmp, password = \"\"),\n      regex = \"The password must be a single\"\n    )\n    expect_error(\n      data_write(d, tmp, password = c(\"test\", \"test2\")),\n      regex = \"The password must be a single\"\n    )\n    expect_error(\n      data_write(d, tmp, password = 123),\n      regex = \"The password must be a single\"\n    )\n    expect_error(\n      data_write(d, tmp, password = \"\"),\n      regex = \"The password must be a single\"\n    )\n\n    # not encrypted\n    data_write(d, tmp)\n    expect_error(data_read(tmp, password = \"test\"), \"File does not appear\")\n\n    # check other decryption functions, should fail when encrypted with datawizard\n    expect_warning(data_write(d, tmp, password = \"test\"))\n    out <- readRDS(tmp)\n    key <- openssl::sha256(charToRaw(\"test\"))\n    expect_error(openssl::aes_cbc_decrypt(out, key = key))\n\n    # check other encryption functions, should fail imported with datawizard\n    x <- serialize(d, NULL)\n    key <- openssl::sha256(charToRaw(\"test\"))\n    saveRDS(openssl::aes_cbc_encrypt(x, key = key), tmp)\n    expect_error(data_read(tmp, password = \"test\"), \"File does not appear\")\n  })\n})\n\n\n# data encryption with rdata ------------------\n\ntest_that(\"data_write, encrypting rdata files\", {\n  skip_if_not_installed(\"withr\")\n  skip_if_not_installed(\"openssl\")\n  withr::with_tempfile(\"tmp\", fileext = \".rdata\", code = {\n    expect_warning(data_write(d, tmp, password = \"test\"), \"Remember\")\n\n    # no password, returns encrypted data frame\n    d2 <- data_read(tmp, verbose = FALSE)\n    expect_named(d2, \"out\")\n\n    # password, returns decrypted data frame\n    d2 <- data_read(tmp, password = \"test\")\n    expect_identical(d, d2)\n  })\n})\n\n\n# data encryption with parquet ------------------\n\ntest_that(\"data_write, encrypting parquet files\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"tmp\", fileext = \".parquet\", code = {\n    expect_error(\n      data_write(d, tmp, password = \"test\"),\n      \"Data encryption is not supported\"\n    )\n  })\n})\n\n\n# SPSS -------------------------------------\n\ntest_that(\"data_write, SPSS\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"tmp\", fileext = \".sav\", code = {\n    expect_message(data_write(d, tmp))\n    d2 <- data_read(tmp, verbose = FALSE)\n    expect_equal(\n      to_factor(d, select = c(\"e16sex\", \"c172code\")),\n      d2,\n      ignore_attr = TRUE\n    )\n    # data encryption not available for SPSS etc.\n    expect_error(\n      data_write(d, tmp, password = \"test\"),\n      \"Data encryption is not supported\"\n    )\n  })\n})\n\n\ntest_that(\"data_write, SPSS, mixed types of labelled vectors\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"tmp\", fileext = \".sav\", code = {\n    d <- data.frame(\n      a = 1:3,\n      b = letters[1:3],\n      c = factor(letters[1:3]),\n      d = as.Date(c(\"2022-01-01\", \"2022-02-01\", \"2022-03-01\")),\n      e = c(TRUE, FALSE, FALSE),\n      stringsAsFactors = FALSE\n    )\n\n    # Date and Logical cannot be labelled\n    d$a <- assign_labels(\n      d$a,\n      variable = \"First\",\n      values = c(\"one\", \"two\", \"three\")\n    )\n    d$b <- assign_labels(d$b, variable = \"Second\", values = c(\"A\", \"B\", \"C\"))\n    d$c <- assign_labels(\n      d$c,\n      variable = \"Third\",\n      values = c(\"ey\", \"bee\", \"see\")\n    )\n\n    expect_message(data_write(d, tmp), regex = \"Preparing\")\n  })\n})\n\n\n# Stata -------------------------------------\n\ntest_that(\"data_write, Stata\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"tmp\", fileext = \".dta\", code = {\n    data_write(d, tmp, verbose = FALSE)\n    d2 <- data_read(tmp, verbose = FALSE)\n\n    expect_equal(\n      to_factor(d, select = c(\"e16sex\", \"c172code\")),\n      d2,\n      ignore_attr = TRUE\n    )\n\n    # data encryption not available for SPSS etc.\n    expect_error(\n      data_write(d, tmp, password = \"test\"),\n      \"Data encryption is not supported\"\n    )\n  })\n})\n\n\n# csv -------------------------\n\ntest_that(\"data_write, CSV, keep numeric\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"tmp\", fileext = \".csv\", code = {\n    data_write(d, tmp)\n    d2 <- data_read(tmp)\n\n    expect_equal(\n      to_numeric(d, dummy_factors = FALSE, preserve_levels = TRUE),\n      d2,\n      ignore_attr = TRUE\n    )\n\n    # data encryption not available for SPSS etc.\n    expect_error(\n      data_write(d, tmp, password = \"test\"),\n      \"Data encryption is not supported\"\n    )\n  })\n})\n\ntest_that(\"data_write, CSV, convert to factor\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"tmp\", fileext = \".csv\", code = {\n    data_write(d, tmp, convert_factors = TRUE)\n    d2 <- data_read(tmp)\n    out <- to_factor(d, select = c(\"e16sex\", \"c172code\"))\n    out$e16sex <- as.character(out$e16sex)\n    out$c172code <- as.character(out$c172code)\n    out$e42dep <- as.numeric(as.character(out$e42dep))\n    expect_equal(out, d2, ignore_attr = TRUE)\n  })\n})\n\ntest_that(\"data_write, CSV, create labels file\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"tmp\", fileext = \".csv\", code = {\n    # file for labels\n    fpath <- dirname(tmp)\n    fname <- sub(\"\\\\.csv$\", \"\", basename(tmp))\n    tmp2 <- paste0(fpath, .Platform$file.sep, fname, \"_labels.csv\")\n    on.exit(unlink(tmp2))\n\n    data(efc)\n    expect_message(data_write(efc, tmp, save_labels = TRUE))\n    d <- data_read(tmp2)\n\n    expect_identical(d$variable[2], \"e16sex\")\n    expect_identical(d$label[2], \"elder's gender\")\n    expect_identical(d$labels[2], \"1=male; 2=female\")\n\n    expect_message(data_write(efc, tmp, save_labels = TRUE, delimiter = \";\"))\n    d <- data_read(tmp2)\n    expect_identical(d$variable[2], \"e16sex\")\n    expect_identical(d$label[2], \"elder's gender\")\n    expect_identical(d$labels[2], \"1=male; 2=female\")\n  })\n})\n\n\n# invalid file type -------------------------\n\ntest_that(\"data_write, no file extension\", {\n  expect_error(data_write(d, \"mytestfile\"))\n  expect_error(data_write(d, NULL))\n})\n\n\n# writing character vector works for missing value labels ------------------\n\ntest_that(\"data_write, existing variable label but missing value labels\", {\n  skip_if_not_installed(\"withr\")\n  withr::with_tempfile(\"tmp\", fileext = \".sav\", code = {\n    d <- data.frame(\n      a = letters[1:3],\n      stringsAsFactors = FALSE\n    )\n    d$a <- assign_labels(d$a, variable = \"First\")\n    # expect message, but no error\n    expect_message(data_write(d, tmp), regex = \"Preparing\")\n\n    # check if data is really the same\n    d2 <- data_read(tmp, verbose = FALSE)\n    expect_identical(d2, d)\n  })\n})\n"
  },
  {
    "path": "tests/testthat/test-demean.R",
    "content": "test_that(\"demean works\", {\n  df <- iris\n\n  set.seed(123)\n  df$ID <- sample.int(4, nrow(df), replace = TRUE) # fake-ID\n\n  set.seed(123)\n  df$binary <- as.factor(rbinom(150, 1, 0.35)) # binary variable\n\n  set.seed(123)\n  x <- demean(\n    df,\n    select = c(\"Sepal.Length\", \"Petal.Length\"),\n    by = \"ID\",\n    append = FALSE\n  )\n  expect_snapshot(head(x))\n\n  set.seed(123)\n  expect_message(\n    {\n      x <- demean(\n        df,\n        select = c(\"Sepal.Length\", \"binary\", \"Species\"),\n        by = \"ID\",\n        append = FALSE\n      )\n    },\n    \"have been coerced to numeric\"\n  )\n  expect_snapshot(head(x))\n\n  set.seed(123)\n  expect_message(\n    {\n      y <- demean(\n        df,\n        select = ~ Sepal.Length + binary + Species,\n        by = ~ID,\n        append = FALSE\n      )\n    },\n    \"have been coerced to numeric\"\n  )\n  expect_message(\n    {\n      z <- demean(\n        df,\n        select = c(\"Sepal.Length\", \"binary\", \"Species\"),\n        by = \"ID\",\n        append = FALSE\n      )\n    },\n    \"have been coerced to numeric\"\n  )\n  expect_identical(y, z)\n\n  set.seed(123)\n  x <- demean(df, select = c(\"Sepal.Length\", \"Petal.Length\"), by = \"ID\")\n  expect_named(\n    x,\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"ID\",\n      \"binary\",\n      \"Sepal.Length_between\",\n      \"Petal.Length_between\",\n      \"Sepal.Length_within\",\n      \"Petal.Length_within\"\n    )\n  )\n  expect_snapshot(head(x))\n\n  df$Sepal.Length_within <- df$Sepal.Length\n  expect_error(\n    demean(df, select = c(\"Sepal.Length\", \"Petal.Length\"), by = \"ID\"),\n    regex = \"One or more of\"\n  )\n})\n\ntest_that(\"demean interaction term\", {\n  dat <- data.frame(\n    a = c(1, 2, 3, 4, 1, 2, 3, 4),\n    x = c(4, 3, 3, 4, 1, 2, 1, 2),\n    y = c(1, 2, 1, 2, 4, 3, 2, 1),\n    ID = c(1, 2, 3, 1, 2, 3, 1, 2)\n  )\n\n  set.seed(123)\n  expect_snapshot(demean(\n    dat,\n    select = c(\"a\", \"x*y\"),\n    by = \"ID\",\n    append = FALSE\n  ))\n})\n\ntest_that(\"demean shows message if some vars don't exist\", {\n  dat <- data.frame(\n    a = c(1, 2, 3, 4, 1, 2, 3, 4),\n    x = c(4, 3, 3, 4, 1, 2, 1, 2),\n    y = c(1, 2, 1, 2, 4, 3, 2, 1),\n    ID = c(1, 2, 3, 1, 2, 3, 1, 2)\n  )\n\n  set.seed(123)\n  expect_error(\n    demean(dat, select = \"foo\", by = \"ID\"),\n    regexp = \"not found\"\n  )\n})\n\n\n# see issue #520\ntest_that(\"demean for cross-classified designs (by > 1)\", {\n  skip_if_not_installed(\"poorman\")\n\n  data(efc, package = \"datawizard\")\n  dat <- na.omit(efc)\n  dat$e42dep <- factor(dat$e42dep)\n  dat$c172code <- factor(dat$c172code)\n\n  x2a <- dat %>%\n    data_group(e42dep) %>%\n    data_modify(\n      c12hour_e42dep = mean(c12hour)\n    ) %>%\n    data_ungroup() %>%\n    data_group(c172code) %>%\n    data_modify(\n      c12hour_c172code = mean(c12hour)\n    ) %>%\n    data_ungroup() %>%\n    data_modify(\n      c12hour_within = c12hour - c12hour_e42dep - c12hour_c172code\n    )\n\n  out <- degroup(\n    dat,\n    select = \"c12hour\",\n    by = c(\"e42dep\", \"c172code\"),\n    suffix_demean = \"_within\"\n  )\n\n  expect_equal(\n    out$c12hour_e42dep_between,\n    x2a$c12hour_e42dep,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    out$c12hour_within,\n    x2a$c12hour_within,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n\n  x2a <- dat %>%\n    data_group(e42dep) %>%\n    data_modify(\n      c12hour_e42dep = mean(c12hour, na.rm = TRUE),\n      neg_c_7_e42dep = mean(neg_c_7, na.rm = TRUE)\n    ) %>%\n    data_ungroup() %>%\n    data_group(c172code) %>%\n    data_modify(\n      c12hour_c172code = mean(c12hour, na.rm = TRUE),\n      neg_c_7_c172code = mean(neg_c_7, na.rm = TRUE)\n    ) %>%\n    data_ungroup() %>%\n    data_modify(\n      c12hour_within = c12hour - c12hour_e42dep - c12hour_c172code,\n      neg_c_7_within = neg_c_7 - neg_c_7_e42dep - neg_c_7_c172code\n    )\n\n  out <- degroup(\n    dat,\n    select = c(\"c12hour\", \"neg_c_7\"),\n    by = c(\"e42dep\", \"c172code\"),\n    suffix_demean = \"_within\"\n  )\n\n  expect_equal(\n    out$c12hour_e42dep_between,\n    x2a$c12hour_e42dep,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    out$neg_c_7_c172code_between,\n    x2a$neg_c_7_c172code,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    out$neg_c_7_within,\n    x2a$neg_c_7_within,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    out$c12hour_within,\n    x2a$c12hour_within,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n\n  # More than 2 groupings\n  mu <- 100\n  ul <- setNames(c(-1, -3, 0, 4), nm = letters[1:4])\n  uL <- setNames(c(10, 30, 0, -40), nm = LETTERS[1:4])\n  um <- setNames(c(100, 150, -250), nm = month.abb[1:3])\n\n  dat <- expand.grid(l = letters[1:4], L = LETTERS[1:4], m = month.abb[1:3])\n\n  set.seed(111)\n  e <- rnorm(nrow(dat) - 1) |> round(2)\n  e <- append(e, -sum(e))\n\n  dat$y <- mu + ul[dat$l] + uL[dat$L] + um[dat$m] + e\n  dat$z <- mu + ul[dat$l] + uL[dat$L] + um[dat$m] + 10 * e\n\n  dat_dem <- datawizard::demean(\n    dat,\n    by = c(\"l\", \"L\", \"m\"),\n    select = c(\"y\", \"z\")\n  )\n\n  expect_equal(dat_dem$y_l_between, ave(dat$y, dat$l), ignore_attr = TRUE)\n  expect_equal(dat_dem$y_L_between, ave(dat$y, dat$L), ignore_attr = TRUE)\n  expect_equal(dat_dem$y_m_between, ave(dat$y, dat$m), ignore_attr = TRUE)\n  expect_equal(rowSums(dat_dem[grepl(\"^y_\", colnames(dat_dem))]), dat$y)\n  expect_equal(rowSums(dat_dem[grepl(\"^z_\", colnames(dat_dem))]), dat$z)\n})\n\n\ntest_that(\"demean, sanity checks\", {\n  data(efc, package = \"datawizard\")\n  dat <- na.omit(efc)\n  dat$e42dep <- factor(dat$e42dep)\n  dat$c172code <- factor(dat$c172code)\n\n  expect_error(\n    degroup(\n      dat,\n      select = c(\"c12hour\", \"neg_c_8\"),\n      by = c(\"e42dep\", \"c172code\"),\n      suffix_demean = \"_within\"\n    ),\n    regex = \"Variable \\\"neg_c_8\\\" was not found\"\n  )\n  expect_error(\n    degroup(\n      dat,\n      select = c(\"c12hour\", \"neg_c_8\"),\n      by = c(\"e42dep\", \"c173code\"),\n      suffix_demean = \"_within\"\n    ),\n    regex = \"Variables \\\"neg_c_8\\\" and \\\"c173code\\\" were not found\"\n  )\n})\n\n\ntest_that(\"demean for nested designs (by > 1), nested = TRUE\", {\n  data(efc, package = \"datawizard\")\n  dat <- na.omit(efc)\n  dat$e42dep <- factor(dat$e42dep)\n  dat$c172code <- factor(dat$c172code)\n\n  x_ijk <- dat$c12hour\n  xbar_k <- ave(x_ijk, dat$e42dep, FUN = mean)\n  xbar_jk <- ave(x_ijk, dat$e42dep, dat$c172code, FUN = mean)\n\n  L3_between <- xbar_k\n  L2_between <- xbar_jk - xbar_k\n  L1_within <- x_ijk - xbar_jk\n\n  out <- degroup(\n    dat,\n    select = \"c12hour\",\n    by = c(\"e42dep\", \"c172code\"),\n    nested = TRUE,\n    suffix_demean = \"_within\"\n  )\n\n  expect_equal(\n    out$c12hour_within,\n    L1_within,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    out$c12hour_e42dep_between,\n    L3_between,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    out$c12hour_c172code_between,\n    L2_between,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n\n  # Following #635\n  testdf <- data.frame(\n    roman = c(\"I\", \"I\", \"I\", \"I\", \"II\", \"II\", \"II\", \"II\"),\n    alphabet = c(\"a\", \"a\", \"b\", \"b\", \"a\", \"b\", \"a\", \"b\"),\n    val1 = c(1, 2, 3, 4, 5, 6, 7, 8),\n    val2 = c(1, 2, 3, 4, 5, 6, 7, 8),\n    val3 = c(1, 2, 3, 4, 5, 6, 7, 8)\n  )\n\n  out <- datawizard::demean(\n    testdf,\n    select = c(\"val1\", \"val2\", \"val3\"),\n    by = \"roman/alphabet\",\n    append = FALSE\n  )\n\n  expect_named(\n    out,\n    c(\n      \"val1_roman_between\",\n      \"val1_alphabet_between\",\n      \"val2_roman_between\",\n      \"val2_alphabet_between\",\n      \"val3_roman_between\",\n      \"val3_alphabet_between\",\n      \"val1_within\",\n      \"val2_within\",\n      \"val3_within\"\n    )\n  )\n\n  expect_equal(\n    as.vector(out$val1_within),\n    c(-0.5, 0.5, -0.5, 0.5, -1, -1, 1, 1)\n  )\n  expect_equal(out$val1_within, out$val2_within)\n  expect_equal(out$val1_within, out$val3_within)\n\n  expect_equal(\n    as.vector(out$val1_roman_between),\n    c(2.5, 2.5, 2.5, 2.5, 6.5, 6.5, 6.5, 6.5)\n  )\n  expect_equal(out$val1_roman_between, out$val2_roman_between)\n  expect_equal(out$val1_roman_between, out$val3_roman_between)\n\n  expect_equal(\n    as.vector(out$val1_alphabet_between),\n    c(-1, -1, 1, 1, -0.5, 0.5, -0.5, 0.5)\n  )\n  expect_equal(out$val1_alphabet_between, out$val2_alphabet_between)\n  expect_equal(out$val1_alphabet_between, out$val3_alphabet_between)\n\n  expect_equal(rowSums(out[, grepl(\"^val1\", names(out))]), testdf$val1)\n})\n"
  },
  {
    "path": "tests/testthat/test-describe_distribution.R",
    "content": "skip_if_not_installed(\"bayestestR\")\n\n# numeric ---------------------------------------\n\ntest_that(\"describe_distribution - numeric: works with basic numeric vector\", {\n  x <- describe_distribution(mtcars$mpg)\n  expect_identical(dim(x), c(1L, 9L))\n  expect_identical(round(x$Mean), 20)\n})\n\ntest_that(\"describe_distribution - numeric: correctly handles missing values\", {\n  no_missing <- describe_distribution(mtcars$mpg)\n  test <- mtcars$mpg\n  test[1] <- NA\n  with_missing <- describe_distribution(test)\n  expect_identical(with_missing$n, 31L)\n  expect_identical(with_missing$n_Missing, 1L)\n  expect_false(with_missing$Mean == no_missing$Mean)\n})\n\ntest_that(\"describe_distribution - numeric: works with quartiles\", {\n  x <- describe_distribution(mtcars$mpg, quartiles = TRUE)\n  expect_identical(dim(x), c(1L, 11L))\n  expect_true(\"Q1\" %in% names(x))\n  expect_true(\"Q3\" %in% names(x))\n})\n\ntest_that(\"describe_distribution - numeric: works with range\", {\n  x <- describe_distribution(mtcars$mpg, range = FALSE)\n  expect_identical(dim(x), c(1L, 7L))\n  expect_false(\"min\" %in% names(x))\n  expect_false(\"max\" %in% names(x))\n})\n\ntest_that(\"describe_distribution - NULL for date\", {\n  v <- as.Date(c(\"2022-01-01\", \"2022-01-02\"))\n  expect_warning(expect_null(describe_distribution(v)))\n})\n\n\n# data frame ---------------------------------------\n\ntest_that(\"describe_distribution - data frame: works with basic data frame\", {\n  x <- describe_distribution(mtcars)\n  expect_identical(dim(x), c(11L, 10L))\n  expect_identical(round(x[1, \"Mean\"]), 20)\n})\n\ntest_that(\"describe_distribution - data frame: correctly handles missing values\", {\n  no_missing <- describe_distribution(mtcars)\n  test <- mtcars\n  test[1, ] <- NA\n  with_missing <- describe_distribution(test)\n  expect_identical(unique(with_missing$n), 31L)\n  expect_identical(unique(with_missing$n_Missing), 1L)\n  expect_false(unique(with_missing$Mean == no_missing$Mean))\n})\n\ntest_that(\"describe_distribution - data frame: works with quartiles\", {\n  x <- describe_distribution(mtcars, quartiles = TRUE)\n  expect_identical(dim(x), c(11L, 12L))\n  expect_true(\"Q1\" %in% names(x))\n  expect_true(\"Q3\" %in% names(x))\n})\n\ntest_that(\"describe_distribution - data frame: works with range\", {\n  x <- describe_distribution(mtcars, range = FALSE)\n  expect_identical(dim(x), c(11L, 8L))\n  expect_false(\"min\" %in% names(x))\n  expect_false(\"max\" %in% names(x))\n})\n\n\n# factor ---------------------------------------\n\ntest_that(\"describe_distribution - factor\", {\n  expect_snapshot(describe_distribution(factor(substring(\n    \"statistics\",\n    1:10,\n    1:10\n  ))))\n})\n\n\n# character ---------------------------------------\n\ntest_that(\"describe_distribution - character\", {\n  expect_snapshot(describe_distribution(as.character(ToothGrowth$supp)))\n})\n\n\n# list ---------------------------------------\n\ntest_that(\"describe_distribution - list: works with basic list\", {\n  x <- list(mtcars$mpg, mtcars$cyl)\n  stored <- describe_distribution(x)\n  unnamed <- describe_distribution(list(mtcars$mpg, mtcars$cyl))\n  named <- describe_distribution(list(foo = mtcars$mpg, foo2 = mtcars$cyl))\n  mix <- describe_distribution(list(foo = mtcars$mpg, mtcars$cyl))\n\n  expect_identical(dim(stored), c(2L, 10L))\n  expect_identical(round(stored$Mean), c(20, 6))\n  expect_identical(dim(unnamed), c(2L, 10L))\n  expect_identical(round(unnamed$Mean), c(20, 6))\n  expect_identical(dim(named), c(2L, 10L))\n  expect_identical(round(named$Mean), c(20, 6))\n  expect_identical(dim(mix), c(2L, 10L))\n  expect_identical(round(mix$Mean), c(20, 6))\n})\n\ntest_that(\"describe_distribution - list: works with include_factors\", {\n  x1 <- describe_distribution(list(mtcars$mpg, factor(mtcars$cyl)))\n  y <- describe_distribution(list(mtcars$mpg))\n  expect_identical(x1, y)\n\n  x2 <- describe_distribution(\n    list(mtcars$mpg, factor(mtcars$cyl)),\n    include_factors = TRUE\n  )\n  expect_identical(dim(x2), c(2L, 10L))\n  expect_identical(x2$Variable, c(\"mtcars$mpg\", \"factor(mtcars$cyl)\"))\n\n  x3 <- describe_distribution(\n    list(mtcars$mpg, foo = factor(mtcars$cyl)),\n    include_factors = TRUE\n  )\n  expect_identical(dim(x3), c(2L, 10L))\n  expect_identical(x3$Variable, c(\"mtcars$mpg\", \"foo\"))\n})\n\ntest_that(\"describe_distribution - list: correctly removes character elements\", {\n  x <- describe_distribution(list(mtcars$mpg, \"something\"))\n  y <- describe_distribution(list(mtcars$mpg))\n  expect_identical(x, y)\n})\n\ntest_that(\"describe_distribution - list: correctly handles variable names\", {\n  x <- list(mtcars$mpg, mtcars$cyl)\n  stored <- describe_distribution(x)\n  unnamed <- describe_distribution(list(mtcars$mpg, mtcars$cyl))\n  named <- describe_distribution(list(foo = mtcars$mpg, foo2 = mtcars$cyl))\n  mix <- describe_distribution(list(foo = mtcars$mpg, mtcars$cyl))\n\n  expect_identical(stored$Variable, c(\"Var_1\", \"Var_2\"))\n  expect_identical(unnamed$Variable, c(\"mtcars$mpg\", \"mtcars$cyl\"))\n  expect_identical(named$Variable, c(\"foo\", \"foo2\"))\n  expect_identical(mix$Variable, c(\"foo\", \"mtcars$cyl\"))\n})\n\ntest_that(\"describe_distribution - list: correctly handles missing values\", {\n  no_missing <- describe_distribution(list(mtcars$mpg, mtcars$cyl))\n  test <- mtcars$mpg\n  test2 <- mtcars$cyl\n  test[1] <- NA\n  test2[1] <- NA\n  with_missing <- describe_distribution(list(test, test2))\n  expect_identical(unique(with_missing$n), 31L)\n  expect_identical(unique(with_missing$n_Missing), 1L)\n  expect_false(unique(with_missing$Mean == no_missing$Mean))\n})\n\ntest_that(\"describe_distribution - list: works with quartiles\", {\n  x <- describe_distribution(list(mtcars$mpg, mtcars$cyl), quartiles = TRUE)\n  expect_identical(dim(x), c(2L, 12L))\n  expect_true(\"Q1\" %in% names(x))\n  expect_true(\"Q3\" %in% names(x))\n})\n\ntest_that(\"describe_distribution - list: works with range\", {\n  x <- describe_distribution(list(mtcars$mpg, mtcars$cyl), range = FALSE)\n  expect_identical(dim(x), c(2L, 8L))\n  expect_false(\"min\" %in% names(x))\n  expect_false(\"max\" %in% names(x))\n})\n\n\n# select ----------------------\n\ntest_that(\"describe_distribution - select\", {\n  data(iris)\n  out <- describe_distribution(iris, select = starts_with(\"Petal\"))\n\n  expect_identical(out$Variable, c(\"Petal.Length\", \"Petal.Width\"))\n  expect_equal(out$Mean, c(3.758000, 1.199333), tolerance = 1e-3)\n\n  expect_null(describe_distribution(iris, select = \"Species\"))\n  out <- describe_distribution(iris, select = \"Species\", include_factors = TRUE)\n  exp <- describe_distribution(iris$Species)\n  expect_identical(out$Range, exp$Range)\n})\n\n\n# select and grouped df ----------------------\n\ntest_that(\"describe_distribution - grouped df\", {\n  data(iris)\n  x <- data_group(iris, Species)\n  out <- describe_distribution(x, select = starts_with(\"Petal\"))\n\n  expect_snapshot(out)\n  expect_equal(\n    out$Mean,\n    c(1.462, 0.246, 4.26, 1.326, 5.552, 2.026),\n    tolerance = 1e-3\n  )\n})\n\n# Mostly to test printing\ntest_that(\"describe_distribution - grouped df and multiple groups\", {\n  x <- data.frame(\n    grp1 = rep(letters[1:3], each = 20),\n    grp2 = rep(letters[1:3], 20),\n    values = 1:30\n  )\n  x <- data_group(x, c(\"grp1\", \"grp2\"))\n  expect_snapshot(describe_distribution(x))\n})\n\ntest_that(\"argument 'by' works\", {\n  # basic\n  grouped <- data_group(mtcars, c(\"am\", \"vs\"))\n  expect_identical(\n    describe_distribution(grouped),\n    describe_distribution(mtcars, by = c(\"am\", \"vs\")),\n    ignore_attr = TRUE\n  )\n\n  # mixing data_group() and arg 'by'\n  grouped <- data_group(mtcars, c(\"am\", \"vs\"))\n  half_grouped <- data_group(mtcars, \"am\")\n  expect_identical(\n    describe_distribution(grouped),\n    describe_distribution(half_grouped, by = \"vs\"),\n    ignore_attr = TRUE\n  )\n\n  expect_error(\n    describe_distribution(mtcars, by = 2),\n    \"must be a character vector\"\n  )\n})\n\ntest_that(\"empty groups are discarded from the output, #608\", {\n  dat <- data.frame(\n    grp1 = factor(\"a\", levels = c(\"a\", \"b\")),\n    grp2 = factor(c(\"A\", \"B\")),\n    value = 1:2\n  )\n  dat <- data_group(dat, c(\"grp1\", \"grp2\"))\n  expect_no_error(\n    suppressWarnings(describe_distribution(dat, ci = 0.95))\n  )\n})\n\n# distribution_mode --------------------------\ntest_that(\"distribution_mode works as expected\", {\n  # atomic vector\n  expect_identical(distribution_mode(c(1, 2, 3, 3, 4, 5)), 3)\n  expect_identical(distribution_mode(c(1, 2, 3, 3, 4, 4, 5)), 3)\n  expect_identical(distribution_mode(c(1.5, 2.3, 3.7, 3.7, 4.0, 5)), 3.7)\n\n  # list\n  expect_identical(distribution_mode(list(1, 2, 3, 3, 4, 5)), list(3))\n\n  # scalar\n  expect_identical(distribution_mode(\"a\"), \"a\")\n\n  # empty\n  expect_null(distribution_mode(NULL))\n})\n\n# select helpers ------------------------------\ntest_that(\"describe_distribution regex\", {\n  expect_equal(\n    describe_distribution(mtcars, select = \"pg\", regex = TRUE),\n    describe_distribution(mtcars, select = \"mpg\"),\n    ignore_attr = TRUE\n  )\n})\n\n# formatting ------------------------------\ntest_that(\"describe_distribution formatting\", {\n  data(iris)\n  x <- describe_distribution(iris$Sepal.Width, quartiles = TRUE)\n  expect_snapshot(format(x))\n})\n\n# other -----------------------------------\ntest_that(\"return NA in CI if sample is too sparse\", {\n  set.seed(123456)\n  expect_silent(expect_message(\n    {\n      res <- describe_distribution(\n        mtcars[mtcars$cyl == \"6\", ],\n        wt,\n        centrality = \"map\",\n        ci = 0.95\n      )\n    },\n    regex = \"Bootstrapping\"\n  ))\n  expect_equal(res$CI_low_map, 2.6462, tolerance = 1e-2)\n  expect_equal(res$CI_high_map, 3.4531, tolerance = 1e-2)\n\n  x <- c(\n    2.5,\n    2.5,\n    2.5,\n    2.5,\n    2.5,\n    2.5,\n    2.2,\n    2.2,\n    2.2,\n    2.5,\n    2.5,\n    2.5,\n    2.5,\n    2.5,\n    2.5\n  )\n  expect_message(\n    {\n      out <- describe_distribution(x, centrality = \"map\")\n    },\n    regex = \"Could not calculate\"\n  )\n  expect_identical(out$MAP, NA_real_)\n  expect_silent(describe_distribution(x, centrality = \"map\", verbose = FALSE))\n})\n\n# check for reserved column names\ntest_that(\"errors on invalid column names (reserved word)\", {\n  data(mtcars)\n\n  out <- data_to_long(\n    mtcars,\n    cols = 1:3,\n    names_to = \"Variable\",\n    values_to = \"Values\"\n  )\n  out <- data_group(out, c(\"gear\", \"Variable\"))\n  expect_error(\n    describe_distribution(out, select = \"Values\"),\n    regex = \"Following variable names are reserved\"\n  )\n\n  out <- data_to_long(\n    mtcars,\n    cols = 1:3,\n    names_to = \"Variable\",\n    values_to = \"Values\"\n  )\n  expect_error(\n    describe_distribution(out, select = \"Variable\"),\n    regex = \"Following variable names are reserved\"\n  )\n})\n\n# multiple centralities\ntest_that(\"multiple centralities work\", {\n  data(iris)\n  out <- describe_distribution(\n    iris,\n    select = \"Petal.Width\",\n    centrality = c(\"median\", \"mean\")\n  )\n  expect_identical(dim(out), c(1L, 12L))\n  expect_named(\n    out,\n    c(\n      \"Variable\",\n      \"Median\",\n      \"MAD\",\n      \"Mean\",\n      \"SD\",\n      \"IQR\",\n      \"Min\",\n      \"Max\",\n      \"Skewness\",\n      \"Kurtosis\",\n      \"n\",\n      \"n_Missing\"\n    )\n  )\n  out <- describe_distribution(\n    iris,\n    select = \"Petal.Width\",\n    centrality = list(\"median\", \"mean\")\n  )\n  expect_identical(dim(out), c(1L, 12L))\n  expect_named(\n    out,\n    c(\n      \"Variable\",\n      \"Median\",\n      \"MAD\",\n      \"Mean\",\n      \"SD\",\n      \"IQR\",\n      \"Min\",\n      \"Max\",\n      \"Skewness\",\n      \"Kurtosis\",\n      \"n\",\n      \"n_Missing\"\n    )\n  )\n})\n\n\ntest_that(\"(multiple) centralities with CIs\", {\n  data(iris)\n  x <- iris$Sepal.Width\n  set.seed(123456)\n  expect_message(\n    {\n      out <- describe_distribution(\n        x,\n        centrality = \"all\",\n        ci = 0.95,\n        iterations = 100\n      )\n    },\n    regex = \"For more stable intervals\"\n  )\n  expect_named(\n    out,\n    c(\n      \"Median\",\n      \"MAD\",\n      \"Mean\",\n      \"SD\",\n      \"MAP\",\n      \"IQR\",\n      \"CI_low_mean\",\n      \"CI_high_mean\",\n      \"CI_low_median\",\n      \"CI_high_median\",\n      \"CI_low_MAP\",\n      \"CI_high_MAP\",\n      \"Min\",\n      \"Max\",\n      \"Skewness\",\n      \"Kurtosis\",\n      \"n\",\n      \"n_Missing\"\n    )\n  )\n  expect_snapshot(print(out, table_width = Inf))\n  expect_silent(describe_distribution(\n    x,\n    centrality = \"all\",\n    ci = 0.95,\n    iterations = 100,\n    verbose = FALSE\n  ))\n\n  set.seed(123456)\n  out <- describe_distribution(\n    x,\n    centrality = \"mean\",\n    ci = 0.95,\n    iterations = 100,\n    verbose = FALSE\n  )\n  expect_named(\n    out,\n    c(\n      \"Mean\",\n      \"SD\",\n      \"IQR\",\n      \"CI_low_mean\",\n      \"CI_high_mean\",\n      \"Min\",\n      \"Max\",\n      \"Skewness\",\n      \"Kurtosis\",\n      \"n\",\n      \"n_Missing\"\n    )\n  )\n  expect_snapshot(print(out, table_width = Inf))\n\n  set.seed(123456)\n  out <- describe_distribution(\n    x,\n    centrality = c(\"MAP\", \"median\"),\n    ci = 0.95,\n    iterations = 100,\n    verbose = FALSE\n  )\n  expect_named(\n    out,\n    c(\n      \"Median\",\n      \"MAD\",\n      \"MAP\",\n      \"IQR\",\n      \"CI_low_MAP\",\n      \"CI_high_MAP\",\n      \"CI_low_median\",\n      \"CI_high_median\",\n      \"Min\",\n      \"Max\",\n      \"Skewness\",\n      \"Kurtosis\",\n      \"n\",\n      \"n_Missing\"\n    )\n  )\n  expect_snapshot(print(out, table_width = Inf))\n\n  # only one message for data frame\n  expect_silent(expect_message(describe_distribution(iris, ci = 0.95)))\n})\n\n\ntest_that(\"display() method exports to markdown\", {\n  skip_if_not_installed(\"knitr\")\n  data(iris)\n  out <- describe_distribution(iris)\n  expect_error(display(out, format = \"invalid\"), regex = \"Invalid option\")\n  expect_snapshot(display(out))\n})\n\n\ntest_that(\"display() method exports to tinytable\", {\n  skip_if_not_installed(\"tinytable\")\n  data(iris)\n  out <- describe_distribution(iris)\n  expect_snapshot(display(out, format = \"tt\"))\n})\n"
  },
  {
    "path": "tests/testthat/test-distributions.R",
    "content": "test_that(\"distributions\", {\n  skip_if_not_installed(\"bayestestR\")\n  skip_if_not_installed(\"parameters\")\n\n  set.seed(123)\n  x <- bayestestR::distribution_normal(100)\n\n  expect_equal(kurtosis(x)$Kurtosis, -0.1119534, tolerance = 0.01)\n  expect_equal(skewness(x)$Skewness, -5.881466e-17, tolerance = 0.01)\n  expect_equal(as.numeric(smoothness(x, \"diff\")), 1.183699, tolerance = 0.01)\n  expect_equal(as.numeric(smoothness(x, \"cor\")), 0.9979799, tolerance = 0.01)\n})\n"
  },
  {
    "path": "tests/testthat/test-empty-dataframe.R",
    "content": "test_that(\"remove empty with character\", {\n  tmp <- data.frame(\n    a = c(1, 2, 3, NA, 5),\n    b = c(1, NA, 3, NA, 5),\n    c = c(NA, NA, NA, NA, NA),\n    d = c(1, NA, 3, NA, 5)\n  )\n\n  expect_identical(empty_columns(tmp), c(c = 3L))\n  expect_identical(empty_rows(tmp), 4L)\n\n  expect_identical(dim(remove_empty_columns(tmp)), c(5L, 3L))\n  expect_identical(dim(remove_empty_rows(tmp)), c(4L, 4L))\n  expect_identical(dim(remove_empty(tmp)), c(4L, 3L))\n\n  expect_snapshot(remove_empty_columns(tmp))\n  expect_snapshot(remove_empty_rows(tmp))\n  expect_snapshot(remove_empty(tmp))\n})\n\n\ntest_that(\"remove empty columns with character\", {\n  tmp <- data.frame(\n    a = c(1, 2, 3, NA, 5),\n    b = c(\"\", NA, \"\", NA, \"\"),\n    c = c(NA, NA, NA, NA, NA),\n    d = c(1, NA, 3, NA, 5),\n    e = c(\"\", \"\", \"\", \"\", \"\"),\n    stringsAsFactors = FALSE\n  )\n\n  expect_identical(empty_columns(tmp), c(b = 2L, c = 3L, e = 5L))\n  expect_identical(dim(remove_empty_columns(tmp)), c(5L, 2L))\n  expect_identical(dim(remove_empty(tmp)), c(4L, 2L))\n})\n\n\ntest_that(\"remove empty rows with character\", {\n  tmp <- data.frame(\n    a = c(1, \"\", 3, NA, 5),\n    b = c(\"\", NA, \"\", NA, \"\"),\n    c = c(NA, NA, NA, NA, NA),\n    d = c(1, NA, 3, NA, 5),\n    e = c(\"\", \"\", \"\", \"\", \"\"),\n    f = factor(c(\"\", \"\", \"\", \"\", \"\")),\n    g = factor(c(\"\", NA, \"\", NA, \"\")),\n    stringsAsFactors = FALSE\n  )\n\n  expect_identical(empty_rows(tmp), c(2L, 4L))\n  expect_identical(dim(remove_empty_rows(tmp)), c(3L, 7L))\n  expect_identical(dim(remove_empty(tmp)), c(3L, 2L))\n})\n\ntest_that(\"empty_columns with only NA characters\", {\n  tmp <- data.frame(\n    var1 = c(1, 1, 1),\n    var2 = c(NA_character_, NA_character_, NA_character_)\n  )\n  expect_identical(empty_columns(tmp), c(var2 = 2L))\n})\n\n\ntest_that(\"works with non-ascii chars\", {\n  tmp <- data.frame(\n    a = c(1, 2, 3, NA, 5),\n    b = c(\"\", NA, \"\", NA, \"\"),\n    c = c(NA, NA, NA, NA, NA),\n    d = c(\"test\", \"Se\\x96ora\", \"works fine\", \"this too\", \"yeah\"),\n    e = c(\"\", \"\", \"\", \"\", \"\"),\n    stringsAsFactors = FALSE\n  )\n  expect_identical(empty_columns(tmp), c(b = 2L, c = 3L, e = 5L))\n})\n"
  },
  {
    "path": "tests/testthat/test-extract_column_names.R",
    "content": "test_that(\"extract_column_names works as expected\", {\n  expect_identical(\n    extract_column_names(iris, starts_with(\"Sepal\")),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, starts_with(\"Sepal\", \"Petal\")),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, ends_with(\"Width\")),\n    c(\"Sepal.Width\", \"Petal.Width\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, ends_with(\"Length\", \"Width\")),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, regex(\"\\\\.\")),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, c(\"Petal.Width\", \"Sepal.Length\")),\n    c(\"Petal.Width\", \"Sepal.Length\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, contains(\"Wid\")),\n    c(\"Sepal.Width\", \"Petal.Width\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, contains(\"en\", \"idt\")),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n\n  expect_identical(\n    extract_column_names(mtcars, c(\"am\", \"gear\", \"cyl\")),\n    c(\"am\", \"gear\", \"cyl\")\n  )\n\n  expect_identical(\n    extract_column_names(mtcars, c(\"vam\", \"gear\", \"cyl\")),\n    c(\"gear\", \"cyl\")\n  )\n\n  expect_warning(expect_null(extract_column_names(mtcars, ends_with(\"abc\"))))\n\n  expect_identical(\n    extract_column_names(mtcars, regex(\"rb$\")),\n    \"carb\"\n  )\n\n  expect_identical(\n    extract_column_names(mtcars, regex(\"^c\")),\n    c(\"cyl\", \"carb\")\n  )\n\n  expect_warning(expect_null(extract_column_names(mtcars, \"^c\")))\n\n  expect_identical(\n    extract_column_names(mtcars, regex(\"^C\"), ignore_case = TRUE),\n    c(\"cyl\", \"carb\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, \"Width$\", regex = TRUE),\n    c(\"Sepal.Width\", \"Petal.Width\")\n  )\n})\n\n\ntest_that(\"extract_column_names from other functions\", {\n  test_fun1 <- function(data, i) {\n    extract_column_names(data, select = i)\n  }\n  expect_identical(\n    test_fun1(iris, c(\"Sepal.Length\", \"Sepal.Width\")),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  expect_identical(\n    test_fun1(iris, starts_with(\"Sep\")),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  test_fun1a <- function(data, i) {\n    extract_column_names(data, select = i, regex = TRUE)\n  }\n  expect_identical(\n    test_fun1a(iris, \"Sep\"),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  test_fun1b <- function(data, i) {\n    extract_column_names(data, select = i, regex = TRUE)\n  }\n  expect_identical(\n    test_fun1b(iris, \"Width$\"),\n    c(\"Sepal.Width\", \"Petal.Width\")\n  )\n\n  test_fun2 <- function(data) {\n    extract_column_names(data, select = starts_with(\"Sep\"))\n  }\n  expect_identical(\n    test_fun2(iris),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n\n  test_fun3 <- function(data) {\n    i <- \"Sep\"\n    extract_column_names(data, select = starts_with(i))\n  }\n  expect_identical(\n    test_fun3(iris),\n    c(\"Sepal.Length\", \"Sepal.Width\")\n  )\n})\n\ntest_that(\"extract_column_names regex\", {\n  expect_identical(\n    extract_column_names(mtcars, select = \"pg\", regex = TRUE),\n    extract_column_names(mtcars, select = \"mpg\")\n  )\n})\n\ntest_that(\"extract_column_names works correctly with minus sign\", {\n  expect_identical(\n    extract_column_names(iris, -\"Sepal.Length\"),\n    c(\"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, -c(\"Sepal.Length\", \"Petal.Width\")),\n    c(\"Sepal.Width\", \"Petal.Length\", \"Species\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, -1),\n    c(\"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_error(\n    extract_column_names(iris, -1:2),\n    regexp = \"can't mix negative\"\n  )\n\n  expect_identical(\n    extract_column_names(iris, -(1:2)),\n    c(\"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, -c(1, 3)),\n    c(\"Sepal.Width\", \"Petal.Width\", \"Species\")\n  )\n\n  expect_identical(\n    extract_column_names(iris, -starts_with(\"Sepal\", \"Petal\")),\n    \"Species\"\n  )\n\n  expect_identical(\n    extract_column_names(iris, -ends_with(\"Length\", \"Width\")),\n    \"Species\"\n  )\n\n  expect_identical(\n    extract_column_names(iris, -contains(\"en\", \"idt\")),\n    \"Species\"\n  )\n\n  expect_identical(\n    extract_column_names(\n      iris,\n      -c(\"Sepal.Length\", \"Petal.Width\"),\n      exclude = \"Species\"\n    ),\n    c(\"Sepal.Width\", \"Petal.Length\")\n  )\n})\n\ntest_that(\"extract_column_names with square brackets\", {\n  expect_identical(\n    extract_column_names(mtcars, select = names(mtcars)[-1]),\n    extract_column_names(mtcars, select = 2:11)\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-labelled_data.R",
    "content": "data(efc, package = \"datawizard\")\n\n# reverse -----------------------------------\n\ntest_that(\"reverse, labels preserved\", {\n  # factor, label\n  expect_identical(\n    attr(reverse(efc$e42dep), \"label\", exact = TRUE),\n    \"elder's dependency\"\n  )\n  # factor, labels\n  expect_named(\n    attr(reverse(efc$e42dep), \"labels\", exact = TRUE),\n    names(attr(efc$e42dep, \"labels\", exact = TRUE))\n  )\n  expect_equal(\n    attr(reverse(efc$e42dep), \"labels\", exact = TRUE),\n    rev(attr(efc$e42dep, \"labels\", exact = TRUE)),\n    ignore_attr = TRUE\n  )\n  # numeric\n  expect_named(\n    attr(reverse(efc$c12hour), \"labels\", exact = TRUE),\n    names(attr(efc$c12hour, \"labels\", exact = TRUE))\n  )\n  # data frame\n  labels <- sapply(reverse(efc), attr, which = \"label\", exact = TRUE)\n  expect_identical(\n    labels,\n    c(\n      c12hour = \"average number of hours of care per week\",\n      e16sex = \"elder's gender\",\n      e42dep = \"elder's dependency\",\n      c172code = \"carer's level of education\",\n      neg_c_7 = \"Negative impact with 7 items\"\n    )\n  )\n})\n\n\n# data_merge -----------------------------------\n\ntest_that(\"data_merge, labels preserved\", {\n  labels <- sapply(\n    data_merge(efc[1:2], efc[3:4], verbose = FALSE),\n    attr,\n    which = \"label\",\n    exact = TRUE\n  )\n  expect_identical(\n    labels,\n    c(\n      c12hour = \"average number of hours of care per week\",\n      e16sex = \"elder's gender\",\n      e42dep = \"elder's dependency\",\n      c172code = \"carer's level of education\"\n    )\n  )\n})\n\n\n# data_extract -----------------------------------\n\ntest_that(\"data_extract, labels preserved\", {\n  # factor\n  expect_equal(\n    attr(data_extract(efc, select = \"e42dep\"), \"labels\", exact = TRUE),\n    attr(efc$e42dep, \"labels\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  expect_equal(\n    attr(data_extract(efc, select = \"c172code\"), \"labels\", exact = TRUE),\n    attr(efc$c172code, \"labels\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # data frame\n  labels <- sapply(\n    data_extract(efc, select = c(\"e42dep\", \"c172code\")),\n    attr,\n    which = \"label\",\n    exact = TRUE\n  )\n  expect_identical(\n    labels,\n    c(e42dep = \"elder's dependency\", c172code = \"carer's level of education\")\n  )\n})\n\n\n# categorize -----------------------------------\n\ntest_that(\"categorize, labels preserved\", {\n  # factor\n  expect_equal(\n    attr(categorize(efc$e42dep), \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  expect_equal(\n    attr(categorize(efc$c12hour), \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# data_reorder -----------------------------------\n\ntest_that(\"data_reorder, labels preserved\", {\n  expect_equal(\n    attr(data_reorder(efc, \"e42dep\")[[1]], \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# data_remove -----------------------------------\n\ntest_that(\"data_remove, labels preserved\", {\n  expect_equal(\n    attr(data_remove(efc, \"e42dep\")[[1]], \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# data_rename -----------------------------------\n\ntest_that(\"data_rename, labels preserved\", {\n  # factor\n  x <- data_rename(efc, \"e42dep\", \"dependency\")\n  expect_equal(\n    attr(x$dependency, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  x <- data_rename(efc, \"c12hour\", \"careload\")\n  expect_equal(\n    attr(x$careload, \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # data frame\n  labels <- sapply(\n    data_remove(efc, starts_with(\"c1\")),\n    attr,\n    which = \"label\",\n    exact = TRUE\n  )\n  expect_identical(\n    labels,\n    c(\n      e16sex = \"elder's gender\",\n      e42dep = \"elder's dependency\",\n      neg_c_7 = \"Negative impact with 7 items\"\n    )\n  )\n})\n\n\n# data_addprefix -----------------------------------\n\ntest_that(\"data_addprefix, labels preserved\", {\n  x <- data_addprefix(efc, \"new_\")\n  # factor\n  expect_equal(\n    attr(x$new_e42dep, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  expect_equal(\n    attr(x$new_c12hour, \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# data_suffix -----------------------------------\n\ntest_that(\"data_addsuffix, labels preserved\", {\n  x <- data_addsuffix(efc, \"_new\")\n  # factor\n  expect_equal(\n    attr(x$e42dep_new, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  expect_equal(\n    attr(x$c12hour_new, \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# to_numeric -----------------------------------\n\ntest_that(\"to_numeric, labels preserved\", {\n  x <- to_numeric(efc, dummy_factors = FALSE)\n  # factor\n  expect_equal(\n    attr(x$e42dep, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  expect_equal(\n    attr(x$c12hour, \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n\n  x <- to_numeric(efc, dummy_factors = TRUE)\n  # numeric\n  expect_equal(\n    attr(x$c12hour, \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# data_match -----------------------------------\n\ntest_that(\"data_match, labels preserved\", {\n  x <- data_match(efc, data.frame(c172code = 1, e16sex = 2), match = \"or\")\n  # factor\n  expect_equal(\n    attr(x$e42dep, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  expect_equal(\n    attr(x$c12hour, \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # filtered\n  expect_equal(\n    attr(x$c172code, \"label\", exact = TRUE),\n    attr(efc$c172code, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    attr(x$c172code, \"labels\", exact = TRUE),\n    attr(efc$c172code, \"labels\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# data_filter -----------------------------------\n\ntest_that(\"data_filter, labels preserved\", {\n  x <- data_filter(efc, c172code == 1 & c12hour > 40)\n  # factor\n  expect_identical(\n    attr(x$e42dep, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE)\n  )\n  # numeric\n  expect_identical(\n    attr(x$c12hour, \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE)\n  )\n})\n\n\n# convert_to_na -----------------------------------\n\ntest_that(\"convert_to_na, labels preserved\", {\n  expect_message({\n    x <- convert_to_na(efc, na = c(2, \"2\"), select = starts_with(\"e\"))\n  })\n  # factor\n  expect_equal(\n    attr(x$e42dep, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  expect_equal(\n    attr(x$e16sex, \"label\", exact = TRUE),\n    attr(efc$e16sex, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n\n  # factor\n  x <- convert_to_na(efc$e42dep, na = \"2\")\n  expect_equal(\n    attr(x, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # numeric\n  x <- convert_to_na(efc$e16sex, na = 2)\n  expect_equal(\n    attr(x, \"label\", exact = TRUE),\n    attr(efc$e16sex, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  # drop unused value labels\n  x <- convert_to_na(efc$c172code, na = 2)\n  expect_identical(\n    attr(x, \"labels\", exact = TRUE),\n    c(`low level of education` = 1, `high level of education` = 3)\n  )\n})\n\n\n# data_select -----------------------------------\n\ntest_that(\"data_select, labels preserved\", {\n  x <- data_select(efc, starts_with(\"c\"))\n  # numeric\n  expect_equal(\n    attr(x$c12hour, \"label\", exact = TRUE),\n    attr(efc$c12hour, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n\n  x <- data_select(efc, starts_with(\"e\"))\n  # factor\n  expect_equal(\n    attr(x$e42dep, \"label\", exact = TRUE),\n    attr(efc$e42dep, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# recode_values -----------------------------------\n\ntest_that(\"recode_values, labels preserved\", {\n  options(data_recode_pattern = NULL)\n  data(efc)\n  x <- recode_values(efc$c172code, recode = list(`0` = 1:2, `1` = 3))\n  expect_equal(\n    attr(x, \"label\", exact = TRUE),\n    attr(efc$c172code, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  expect_null(attr(x, \"labels\", exact = TRUE))\n})\n\n\n# slide -----------------------------------\n\ntest_that(\"slide, labels preserved\", {\n  data(efc)\n  suppressMessages({\n    x <- slide(efc)\n  })\n  expect_equal(\n    attr(x$c172code, \"label\", exact = TRUE),\n    attr(efc$c172code, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  expect_null(attr(x$c172code, \"labels\", exact = TRUE))\n\n  x <- slide(efc$c172code)\n  expect_equal(\n    attr(x, \"label\", exact = TRUE),\n    attr(efc$c172code, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n\n\n# to_factor -----------------------------------\n\ntest_that(\"to_factor, labels preserved\", {\n  data(efc)\n  x <- to_factor(efc)\n  expect_equal(\n    attr(x$c172code, \"label\", exact = TRUE),\n    attr(efc$c172code, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n  expect_null(attr(x$c172code, \"labels\", exact = TRUE))\n\n  x <- to_factor(efc$c172code)\n  expect_equal(\n    attr(x, \"label\", exact = TRUE),\n    attr(efc$c172code, \"label\", exact = TRUE),\n    ignore_attr = TRUE\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-labels_to_levels.R",
    "content": "test_that(\"labels_to_levels, numeric\", {\n  expect_message(\n    labels_to_levels(1:10),\n    regex = \"only works\"\n  )\n})\n\ntest_that(\"labels_to_levels, factor\", {\n  data(efc)\n  x <- as.factor(efc$c172code)\n  attr(x, \"labels\") <- c(low = 1, mid = 2, high = 3)\n  x <- labels_to_levels(x)\n  expect_identical(levels(x), c(\"low\", \"mid\", \"high\"))\n  expect_equal(table(x), table(efc$c172code), ignore_attr = TRUE)\n\n  x <- as.ordered(efc$c172code)\n  attr(x, \"labels\") <- c(low = 1, mid = 2, high = 3)\n  x <- labels_to_levels(x)\n  expect_identical(levels(x), c(\"low\", \"mid\", \"high\"))\n  expect_s3_class(x, \"ordered\")\n})\n\ntest_that(\"labels_to_levels, factor, error on no labels\", {\n  data(efc)\n  data(iris)\n  x <- as.factor(efc$c172code)\n  expect_error(labels_to_levels(x), regex = \"Could not change factor\")\n  expect_error(labels_to_levels(iris), regex = \"Could not change factor\")\n})\n\ntest_that(\"labels_to_levels, data frame, append\", {\n  data(efc)\n  out <- labels_to_levels(efc, append = \"_ll\")\n  expect_named(\n    out,\n    c(\"c12hour\", \"e16sex\", \"e42dep\", \"c172code\", \"neg_c_7\", \"e42dep_ll\")\n  )\n})\n\ntest_that(\"labels_to_levels, data frame, append\", {\n  data(iris)\n  d <- as.data.frame(lapply(iris, as.factor))\n  expect_identical(labels_to_levels(d), d)\n})\n\ntest_that(\"labels_to_levels, factor, data frame\", {\n  data(efc)\n  out <- labels_to_levels(efc)\n  expect_identical(\n    levels(out$e42dep),\n    c(\n      \"independent\",\n      \"slightly dependent\",\n      \"moderately dependent\",\n      \"severely dependent\"\n    )\n  )\n  expect_identical(sum(vapply(efc, is.factor, TRUE)), 1L)\n})\n\ntest_that(\"labels_to_levels, factor, with random value numbers (no sequential order)\", {\n  x <- c(5, 5, 1, 3, 1, 7)\n  attr(x, \"labels\") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)\n  out <- to_factor(x, labels_to_levels = TRUE)\n  expect_identical(\n    as.character(out),\n    c(\"don't know\", \"don't know\", \"yes\", \"maybe\", \"yes\", \"no\")\n  )\n  expect_identical(levels(out), c(\"yes\", \"maybe\", \"don't know\", \"no\"))\n\n  x <- c(4, 4, 1, 2, 1, 3)\n  attr(x, \"labels\") <- c(a = 1, b = 2, c = 3, d = 4)\n  out <- to_factor(x, labels_to_levels = TRUE)\n  expect_identical(as.character(out), c(\"d\", \"d\", \"a\", \"b\", \"a\", \"c\"))\n  expect_identical(levels(out), c(\"a\", \"b\", \"c\", \"d\"))\n\n  x <- c(4, 4, 1, 2, 1, 3)\n  attr(x, \"labels\") <- c(d = 1, c = 2, b = 3, a = 4)\n  out <- to_factor(x, labels_to_levels = TRUE)\n  expect_identical(as.character(out), c(\"a\", \"a\", \"d\", \"c\", \"d\", \"b\"))\n  expect_identical(levels(out), c(\"d\", \"c\", \"b\", \"a\"))\n\n  x <- c(5, 5, 1, 3, 1, 7)\n  attr(x, \"labels\") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)\n  out <- to_factor(x, labels_to_levels = TRUE)\n  expect_identical(\n    out,\n    structure(\n      c(3L, 3L, 1L, 2L, 1L, 4L),\n      levels = c(\"yes\", \"maybe\", \"don't know\", \"no\"),\n      class = \"factor\"\n    )\n  )\n  expect_identical(\n    as.character(out),\n    c(\"don't know\", \"don't know\", \"yes\", \"maybe\", \"yes\", \"no\")\n  )\n\n  x <- c(5, 5, 1, 3, 1, 7, 4)\n  attr(x, \"labels\") <- c(no = 7, yes = 1, maybe = 3, `don't know` = 5)\n  expect_message(\n    {\n      out <- to_factor(x, labels_to_levels = TRUE)\n    },\n    regex = \"Not all factor levels\"\n  )\n  expect_identical(\n    out,\n    structure(\n      c(4L, 4L, 1L, 2L, 1L, 5L, 3L),\n      levels = c(\"yes\", \"maybe\", \"4\", \"don't know\", \"no\"),\n      class = \"factor\"\n    )\n  )\n  expect_identical(\n    as.character(out),\n    c(\"don't know\", \"don't know\", \"yes\", \"maybe\", \"yes\", \"no\", \"4\")\n  )\n\n  x <- c(5, 5, 1, 3, 1, 7)\n  attr(x, \"labels\") <- c(no = 7, yes = 1, maybe = 4, `don't know` = 5)\n  expect_message({\n    out <- to_factor(x, labels_to_levels = TRUE)\n  })\n  expect_identical(\n    out,\n    structure(\n      c(3L, 3L, 1L, 2L, 1L, 4L),\n      levels = c(\"yes\", \"3\", \"don't know\", \"no\"),\n      class = \"factor\"\n    )\n  )\n  expect_identical(\n    as.character(out),\n    c(\"don't know\", \"don't know\", \"yes\", \"3\", \"yes\", \"no\")\n  )\n\n  x <- c(5, 5, 1, 3, 1, 7, 6)\n  attr(x, \"labels\") <- c(no = 7, yes = 1, maybe = 4, `don't know` = 5)\n  expect_message({\n    out <- to_factor(x, labels_to_levels = TRUE)\n  })\n  expect_identical(\n    out,\n    structure(\n      c(3L, 3L, 1L, 2L, 1L, 5L, 4L),\n      levels = c(\"yes\", \"3\", \"don't know\", \"6\", \"no\"),\n      class = \"factor\"\n    )\n  )\n  expect_identical(\n    as.character(out),\n    c(\"don't know\", \"don't know\", \"yes\", \"3\", \"yes\", \"no\", \"6\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-makepredictcall.R",
    "content": "test_that(\"makepredictcall\", {\n  data(\"mtcars\")\n  nd <- data.frame(hp = c(100, 200))\n\n  m1 <- lm(mpg ~ scale(hp, scale = FALSE), mtcars)\n  m2 <- lm(mpg ~ center(hp), mtcars)\n\n  m3 <- lm(mpg ~ scale(hp), mtcars)\n  m4 <- lm(mpg ~ standardize(hp), mtcars)\n\n  p1 <- predict(m1, nd)\n  expect_equal(p1, predict(m2, nd))\n  expect_equal(p1, predict(m3, nd))\n  expect_equal(p1, predict(m4, nd))\n\n  X <- matrix(rnorm(100), ncol = 2)\n  Y <- rnorm(50)\n  expect_error(lm(Y ~ standardize(X)), \"matrices\")\n})\n\n\ntest_that(\"makepredictcall, normalize\", {\n  data(\"mtcars\")\n  m1 <- lm(mpg ~ normalize(hp), data = mtcars)\n  m2 <- lm(mpg ~ hp, data = mtcars)\n  m3 <- lm(mpg ~ normalize(hp, include_bounds = FALSE), data = mtcars)\n\n  out1 <- predict(m1, newdata = data.frame(hp = c(100, 110, 120)))\n  out2 <- predict(m2, newdata = data.frame(hp = c(100, 110, 120)))\n  out3 <- predict(m3, newdata = data.frame(hp = c(100, 110, 120)))\n\n  expect_equal(out1, out2, tolerance = 1e-3, ignore_attr = TRUE)\n  expect_equal(out1, out3, tolerance = 1e-3, ignore_attr = TRUE)\n  expect_equal(out2, out3, tolerance = 1e-3, ignore_attr = TRUE)\n\n  out1 <- predict(m1, newdata = data.frame(hp = 110))\n  out2 <- predict(m2, newdata = data.frame(hp = 110))\n  out3 <- predict(m3, newdata = data.frame(hp = 110))\n\n  expect_equal(out1, out2, tolerance = 1e-3, ignore_attr = TRUE)\n  expect_equal(out1, out3, tolerance = 1e-3, ignore_attr = TRUE)\n  expect_equal(out2, out3, tolerance = 1e-3, ignore_attr = TRUE)\n})\n\n\ntest_that(\"makepredictcall, rescale\", {\n  data(\"mtcars\")\n  m1 <- lm(mpg ~ rescale(hp, to = c(50, 80)), data = mtcars)\n  m2 <- lm(mpg ~ hp, data = mtcars)\n  m3 <- lm(mpg ~ rescale(hp), data = mtcars)\n\n  out1 <- predict(m1, newdata = data.frame(hp = c(100, 110, 120)))\n  out2 <- predict(m2, newdata = data.frame(hp = c(100, 110, 120)))\n  out3 <- predict(m3, newdata = data.frame(hp = c(100, 110, 120)))\n\n  expect_equal(out1, out2, tolerance = 1e-3, ignore_attr = TRUE)\n  expect_equal(out1, out3, tolerance = 1e-3, ignore_attr = TRUE)\n  expect_equal(out2, out3, tolerance = 1e-3, ignore_attr = TRUE)\n\n  out1 <- predict(m1, newdata = data.frame(hp = 110))\n  out2 <- predict(m2, newdata = data.frame(hp = 110))\n  out3 <- predict(m3, newdata = data.frame(hp = 110))\n\n  expect_equal(out1, out2, tolerance = 1e-3, ignore_attr = TRUE)\n  expect_equal(out1, out3, tolerance = 1e-3, ignore_attr = TRUE)\n  expect_equal(out2, out3, tolerance = 1e-3, ignore_attr = TRUE)\n})\n"
  },
  {
    "path": "tests/testthat/test-mean_sd.R",
    "content": "test_that(\"mean_sd\", {\n  x <- c(-1, 0, 1)\n  msd <- mean_sd(x)\n  expect_identical(unname(msd), x)\n  expect_named(msd, c(\"-SD\", \"Mean\", \"+SD\"))\n\n  msd <- mean_sd(mtcars[[\"mpg\"]])\n  mmad <- median_mad(mtcars[[\"mpg\"]])\n  expect_identical(\n    unname(msd),\n    mean(mtcars[[\"mpg\"]]) + c(-1, 0, 1) * sd(mtcars[[\"mpg\"]])\n  )\n  expect_identical(\n    unname(mmad),\n    median(mtcars[[\"mpg\"]]) + c(-1, 0, 1) * mad(mtcars[[\"mpg\"]])\n  )\n\n  msd2 <- mean_sd(mtcars[[\"mpg\"]], times = 3L)\n  expect_length(msd2, n = 3 * 2 + 1)\n  expect_identical(unname(msd2[3:5]), unname(msd))\n  expect_equal(\n    unname(diff(msd2)),\n    rep(sd(mtcars[[\"mpg\"]]), 6),\n    tolerance = 0.00001\n  )\n  expect_named(\n    msd2,\n    c(\"-3 SD\", \"-2 SD\", \"-1 SD\", \"Mean\", \"+1 SD\", \"+2 SD\", \"+3 SD\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-means_by_group.R",
    "content": "test_that(\"mean_by_group\", {\n  skip_if_not_installed(\"emmeans\")\n  data(efc)\n  expect_snapshot(means_by_group(efc, \"c12hour\", \"e42dep\"))\n  expect_snapshot(means_by_group(efc, \"c12hour\", \"e42dep\", ci = 0.99))\n  expect_snapshot(means_by_group(efc, \"c12hour\", \"e42dep\", ci = NA))\n  expect_snapshot(means_by_group(efc, c(\"neg_c_7\", \"c12hour\"), \"e42dep\"))\n  expect_snapshot(means_by_group(\n    efc,\n    c(\"neg_c_7\", \"c12hour\"),\n    \"e42dep\",\n    ci = NA\n  ))\n  expect_snapshot(means_by_group(\n    efc,\n    c(\"neg_c_7\", \"c12hour\"),\n    \"e42dep\",\n    ci = 0.99\n  ))\n  expect_snapshot(means_by_group(efc$c12hour, efc$e42dep))\n  expect_snapshot(means_by_group(efc$c12hour, efc$e42dep, ci = NA))\n})\n\ntest_that(\"mean_by_group, weighted\", {\n  skip_if_not_installed(\"emmeans\")\n  data(efc)\n  set.seed(123)\n  efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))\n  expect_snapshot(\n    means_by_group(efc, \"c12hour\", \"e42dep\", weights = \"weight\"),\n    variant = \"windows\"\n  )\n  expect_snapshot(\n    means_by_group(efc, \"c12hour\", \"e42dep\", weights = \"weight\", ci = NA),\n    variant = \"windows\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-normalize.R",
    "content": "test_that(\"normalize work as expected\", {\n  expect_equal(\n    normalize(c(0, 1, 5, -5, -2)),\n    c(0.5, 0.6, 1, 0, 0.3),\n    ignore_attr = TRUE\n  )\n\n  expect_equal(\n    normalize(c(0, 1, 5, -5, -2), include_bounds = FALSE),\n    c(0.5, 0.58, 0.9, 0.1, 0.34),\n    ignore_attr = TRUE\n  )\n\n  expect_equal(\n    normalize(c(0, 1, 5, -5, -2), include_bounds = 0.01),\n    c(0.5, 0.598, 0.99, 0.01, 0.304),\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n\n  expect_equal(\n    normalize(c(0, 1, 5, -5, -2), include_bounds = \"a\", verbose = FALSE),\n    c(0.5, 0.6, 1, 0, 0.3),\n    ignore_attr = TRUE,\n    tolerance = 1e-4\n  )\n\n  expect_warning(normalize(\n    c(0, 1, 5, -5, -2),\n    include_bounds = \"a\",\n    verbose = TRUE\n  ))\n\n  expect_snapshot(head(normalize(trees)))\n})\n\n\ntest_that(\"normalize: only NAs\", {\n  expect_equal(\n    normalize(c(NA_real_, NA_real_)),\n    c(NA_real_, NA_real_),\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"normalize: with Inf\", {\n  expect_equal(\n    normalize(c(1, 2, 3, NA, Inf)),\n    c(0, 0.5, 1, NA, Inf),\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"normalize: with Inf\", {\n  expect_equal(\n    normalize(c(1, 2, 3, -Inf, Inf)),\n    c(0, 0.5, 1, -Inf, Inf),\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"normalize: all Inf\", {\n  expect_equal(\n    normalize(c(-Inf, Inf)),\n    c(-Inf, Inf),\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"normalize: all Na or Inf\", {\n  expect_equal(\n    normalize(c(NA, -Inf, NA, Inf)),\n    c(NA, -Inf, NA, Inf),\n    ignore_attr = TRUE\n  )\n})\n\n\ntest_that(\"normalize: only one value\", {\n  foo <- 1\n  expect_warning(\n    normalize(x = foo),\n    regexp = \"Variable `foo` contains only one unique value and will\"\n  )\n  expect_warning(\n    {\n      y <- normalize(x = 12)\n    },\n    regexp = \"Variable `12` contains only one unique value and will\"\n  )\n  expect_equal(y, 12, ignore_attr = TRUE)\n\n  expect_silent(normalize(x = foo, verbose = FALSE))\n  expect_equal(normalize(x = foo, verbose = FALSE), 1, ignore_attr = TRUE)\n})\n\ntest_that(\"normalize: only two values\", {\n  expect_warning({\n    y <- normalize(x = c(1, 2))\n  })\n  expect_equal(y, c(0, 1), ignore_attr = TRUE)\n\n  expect_silent(normalize(x = c(1, 2), verbose = FALSE))\n  expect_equal(\n    normalize(x = c(1, 2), verbose = FALSE),\n    c(0, 1),\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"normalize: factor\", {\n  expect_identical(\n    normalize(factor(1:3)),\n    factor(1:3)\n  )\n})\n\ntest_that(\"normalize: matrix\", {\n  expect_equal(\n    normalize(matrix(1:4, ncol = 2)),\n    matrix(seq(0, 1, by = 0.3333), ncol = 2),\n    tolerance = 1e-3\n  )\n})\n\ntest_that(\"normalize: select\", {\n  skip_if_not_installed(\"poorman\")\n\n  expect_equal(\n    normalize(\n      iris,\n      select = starts_with(\"Petal\\\\.L\")\n    ) %>%\n      poorman::pull(Petal.Length),\n    normalize(iris$Petal.Length),\n    ignore_attr = TRUE\n  )\n})\n\ntest_that(\"normalize: exclude\", {\n  skip_if_not_installed(\"poorman\")\n\n  expect_identical(\n    normalize(\n      iris,\n      exclude = ends_with(\"ecies\")\n    ),\n    iris %>%\n      normalize(select = 1:4)\n  )\n})\n\ntest_that(\"normalize, with append\", {\n  out_n <- normalize(iris, \"Sepal.Width\", append = TRUE)\n  manual <- (iris$Sepal.Width - min(iris$Sepal.Width)) /\n    diff(range(iris$Sepal.Width))\n  expect_equal(out_n$Sepal.Width_n, manual, ignore_attr = TRUE)\n})\n\n\n# with grouped data -------------------------------------------\n\ntest_that(\"normalize (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard <- iris %>%\n    poorman::group_by(Species) %>%\n    normalize(Sepal.Width) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  manual <- iris %>%\n    poorman::group_by(Species) %>%\n    poorman::mutate(\n      Sepal.Width = (Sepal.Width - min(Sepal.Width)) / diff(range(Sepal.Width))\n    ) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  expect_identical(datawizard, manual)\n})\n\ntest_that(\"normalize (grouped data), with append\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard_n <- iris %>%\n    poorman::group_by(Species) %>%\n    normalize(Sepal.Width, append = TRUE) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width_n)\n\n  manual_n <- iris %>%\n    poorman::group_by(Species) %>%\n    poorman::mutate(\n      Sepal.Width = (Sepal.Width - min(Sepal.Width)) / diff(range(Sepal.Width))\n    ) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  expect_identical(datawizard_n, manual_n)\n})\n\ntest_that(\"normalize, include bounds (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard <- iris %>%\n    poorman::group_by(Species) %>%\n    normalize(Sepal.Width, include_bounds = TRUE) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  manual <- iris %>%\n    poorman::group_by(Species) %>%\n    poorman::mutate(\n      Sepal.Width = (Sepal.Width - min(Sepal.Width)) / diff(range(Sepal.Width))\n    ) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Sepal.Width)\n\n  expect_identical(datawizard, manual)\n})\n\n\ntest_that(\"normalize, factor (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  datawizard <- iris %>%\n    poorman::group_by(Species) %>%\n    normalize(Species) %>%\n    poorman::ungroup() %>%\n    poorman::pull(Species)\n\n  manual <- iris$Species\n\n  expect_identical(datawizard, manual)\n})\n\n# select helpers ------------------------------\ntest_that(\"normalize regex\", {\n  expect_identical(\n    normalize(mtcars, select = \"pg\", regex = TRUE),\n    normalize(mtcars, select = \"mpg\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-print.dw_transformer.R",
    "content": "test_that(\"print.dw_transformer\", {\n  data(iris)\n  expect_snapshot(rescale(iris$Sepal.Length))\n  expect_snapshot(normalize(iris$Sepal.Length))\n  expect_snapshot(center(iris$Sepal.Length))\n  expect_snapshot(standardize(iris$Sepal.Length))\n})\n"
  },
  {
    "path": "tests/testthat/test-ranktransform.R",
    "content": "test_that(\"ranktransform works with NAs\", {\n  x <- c(NA_real_, NA_real_)\n  expect_identical(ranktransform(x), x)\n})\n\ntest_that(\"ranktransform works with factors\", {\n  x <- factor(c(\"apple\", \"bear\", \"banana\", \"dear\"))\n  expect_identical(ranktransform(x), x)\n})\n\ntest_that(\"ranktransform works with unique value vectors\", {\n  x <- c(1L, 1L, 1L)\n\n  expect_identical(suppressWarnings(ranktransform(x)), x)\n\n  expect_warning(\n    ranktransform(x),\n    \"Variable `x` contains only one unique value and will not be normalized.\"\n  )\n})\n\ntest_that(\"ranktransform works with two unique value vectors\", {\n  x <- c(1L, 1L, 1L, 2L, 2L, 2L)\n\n  expect_identical(suppressWarnings(ranktransform(x)), c(2, 2, 2, 5, 5, 5))\n\n  expect_warning(\n    ranktransform(x),\n    \"Consider converting it\"\n  )\n})\n\ntest_that(\"signed rank works as expected\", {\n  x <- c(-1, 2, -3, 4)\n\n  sr <- ranktransform(x, sign = TRUE)\n  r <- ranktransform(x, sign = FALSE)\n\n  expect_identical(sr, x) # unchanged\n  expect_identical(r, c(2, 3, 1, 4))\n\n  x <- c(1, -2, -2, 4, 0, 3, -14, 0)\n  expect_warning(ranktransform(x, sign = TRUE))\n  expect_true(all(is.na(suppressWarnings(\n    ranktransform(x, sign = TRUE)[c(5, 8)]\n  ))))\n})\n\ntest_that(\"argument 'zeros' works\", {\n  x <- c(-1, 0, 2, -3, 4)\n  expect_warning(\n    ranktransform(x, sign = TRUE),\n    \"cannot be sign-rank\"\n  )\n  expect_identical(\n    ranktransform(x, sign = TRUE, zeros = \"signrank\"),\n    c(-2, 0, 3, -4, 5)\n  )\n  expect_error(\n    ranktransform(x, sign = TRUE, zeros = \"foo\"),\n    \"should be one of\"\n  )\n})\n\ntest_that(\"ranktransform works with data frames\", {\n  set.seed(123)\n  expect_snapshot(ranktransform(BOD))\n})\n\n\n# with grouped data -------------------------------------------\n\ntest_that(\"ranktransform works with data frames (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  set.seed(123)\n  value1 <- sample.int(20, 9, replace = TRUE)\n  set.seed(456)\n  value2 <- sample.int(20, 9, replace = TRUE)\n\n  test_df <- data.frame(\n    id = rep(c(\"A\", \"B\", \"C\"), each = 3),\n    value1 = value1,\n    value2 = value2,\n    stringsAsFactors = FALSE\n  )\n\n  # nolint start: nested_pipe_linter\n  expect_identical(\n    test_df %>%\n      poorman::group_by(id) %>%\n      ranktransform(exclude = \"id\") %>%\n      poorman::ungroup(),\n    data.frame(\n      id = rep(c(\"A\", \"B\", \"C\"), each = 3),\n      value1 = c(2, 3, 1, 1, 2, 3, 2, 1, 3),\n      value2 = c(3, 2, 1, 1, 3, 2, 2, 3, 1),\n      stringsAsFactors = FALSE\n    )\n  )\n  # nolint end\n})\n\n\ntest_that(\"ranktransform works with data frames containing NAs (grouped data)\", {\n  skip_if_not_installed(\"poorman\")\n\n  set.seed(789)\n  value1 <- sample(c(1:15, NA), 9, replace = TRUE)\n  set.seed(10)\n  value2 <- sample(c(1:15, NA), 9, replace = TRUE)\n\n  test_df <- data.frame(\n    id = rep(c(\"A\", \"B\", \"C\"), each = 3),\n    value1 = value1,\n    value2 = value2,\n    stringsAsFactors = FALSE\n  )\n\n  # nolint start: nested_pipe_linter\n  expect_identical(\n    test_df %>%\n      poorman::group_by(id) %>%\n      ranktransform(exclude = \"id\") %>%\n      poorman::ungroup(),\n    data.frame(\n      id = rep(c(\"A\", \"B\", \"C\"), each = 3),\n      value1 = c(2, NA, 1, 1, 3, 2, 2, NA, 1),\n      value2 = c(3, 1, 2, NA, 2, 1, 3, 1, 2),\n      stringsAsFactors = FALSE\n    )\n  )\n  # nolint end\n})\n\n# select helpers ------------------------------\ntest_that(\"ranktransform regex\", {\n  expect_identical(\n    ranktransform(mtcars, select = \"pg\", regex = TRUE),\n    ranktransform(mtcars, select = \"mpg\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-recode_into.R",
    "content": "test_that(\"recode_into\", {\n  x <- 1:10\n  out <- recode_into(\n    x > 5 ~ \"a\",\n    x > 2 & x <= 5 ~ \"b\",\n    default = \"c\"\n  )\n  expect_identical(out, c(\"c\", \"c\", \"b\", \"b\", \"b\", \"a\", \"a\", \"a\", \"a\", \"a\"))\n})\n\ntest_that(\"recode_into, overwrite\", {\n  x <- 1:30\n  expect_warning(\n    recode_into(\n      x > 1 ~ \"a\",\n      x > 10 & x <= 15 ~ \"b\",\n      default = \"c\",\n      overwrite = TRUE\n    ),\n    regex = \"overwritten\"\n  )\n  # validate results\n  x <- 1:10\n  expect_silent({\n    out <- recode_into(\n      x >= 3 & x <= 7 ~ 1,\n      x > 5 ~ 2,\n      default = 0,\n      verbose = FALSE\n    )\n  })\n  expect_identical(out, c(0, 0, 1, 1, 1, 2, 2, 2, 2, 2))\n  expect_warning(\n    recode_into(\n      x >= 3 & x <= 7 ~ 1,\n      x > 5 ~ 2,\n      default = 0\n    ),\n    regex = \"case 6\"\n  )\n\n  x <- 1:10\n  expect_silent({\n    out <- recode_into(\n      x >= 3 & x <= 7 ~ 1,\n      x > 5 ~ 2,\n      default = 0,\n      overwrite = FALSE,\n      verbose = FALSE\n    )\n  })\n  expect_identical(out, c(0, 0, 1, 1, 1, 1, 1, 2, 2, 2))\n  expect_warning(\n    recode_into(\n      x >= 3 & x <= 7 ~ 1,\n      x > 5 ~ 2,\n      default = 0,\n      overwrite = FALSE\n    ),\n    regex = \"case 6\"\n  )\n})\n\ntest_that(\"recode_into, don't overwrite\", {\n  x <- 1:30\n  expect_warning(\n    recode_into(\n      x > 1 ~ \"a\",\n      x > 10 & x <= 15 ~ \"b\",\n      default = \"c\",\n      overwrite = FALSE\n    ),\n    regex = \"altered\"\n  )\n})\n\ntest_that(\"recode_into, check mixed types\", {\n  x <- 1:10\n  expect_error(\n    {\n      out <- recode_into(\n        x > 5 ~ 1,\n        x > 2 & x <= 5 ~ \"b\"\n      )\n    },\n    regexp = \"Recoding not carried out\"\n  )\n})\n\ntest_that(\"recode_into, complain about default = NULL\", {\n  x <- 1:10\n  expect_warning(\n    {\n      out <- recode_into(\n        x > 5 ~ \"c\",\n        x > 2 & x <= 5 ~ \"b\",\n        default = NULL\n      )\n    },\n    regexp = \"Default value\"\n  )\n  expect_identical(out, c(NA, NA, \"b\", \"b\", \"b\", \"c\", \"c\", \"c\", \"c\", \"c\"))\n})\n\ntest_that(\"recode_into, data frame\", {\n  data(mtcars)\n  out <- recode_into(\n    mtcars$mpg > 20 & mtcars$cyl == 6 ~ 1,\n    mtcars$mpg <= 20 ~ 2,\n    default = 0\n  )\n  expect_identical(\n    out,\n    c(\n      1,\n      1,\n      0,\n      1,\n      2,\n      2,\n      2,\n      0,\n      0,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      0,\n      0,\n      0,\n      0,\n      2,\n      2,\n      2,\n      2,\n      0,\n      0,\n      0,\n      2,\n      2,\n      2,\n      0\n    )\n  )\n  d <- mtcars\n  out <- recode_into(\n    mpg > 20 & cyl == 6 ~ 1,\n    mpg <= 20 ~ 2,\n    default = 0,\n    data = d\n  )\n  expect_identical(\n    out,\n    c(\n      1,\n      1,\n      0,\n      1,\n      2,\n      2,\n      2,\n      0,\n      0,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      2,\n      0,\n      0,\n      0,\n      0,\n      2,\n      2,\n      2,\n      2,\n      0,\n      0,\n      0,\n      2,\n      2,\n      2,\n      0\n    )\n  )\n})\n\ntest_that(\"recode_into, works inside functions\", {\n  test <- function() {\n    set.seed(123)\n    d <- data.frame(\n      x = sample.int(5, 30, TRUE),\n      y = sample(letters[1:5], 30, TRUE),\n      stringsAsFactors = FALSE\n    )\n    recode_into(\n      x %in% 1:3 & y %in% c(\"a\", \"b\") ~ 1,\n      x > 3 ~ 2,\n      data = d,\n      default = 0\n    )\n  }\n  expect_identical(\n    test(),\n    c(\n      1,\n      1,\n      1,\n      0,\n      0,\n      2,\n      2,\n      0,\n      1,\n      1,\n      2,\n      0,\n      0,\n      0,\n      2,\n      1,\n      1,\n      2,\n      1,\n      0,\n      1,\n      1,\n      0,\n      2,\n      0,\n      1,\n      2,\n      2,\n      1,\n      2\n    )\n  )\n})\n\ntest_that(\"recode_into, check differen input length\", {\n  x <- 1:10\n  y <- 10:30\n  expect_error(\n    {\n      out <- recode_into(\n        x > 5 ~ 1,\n        y > 10 ~ 2\n      )\n    },\n    regexp = \"matching conditions\"\n  )\n})\n\ntest_that(\"recode_into, check different input length\", {\n  x <- 1:5\n  y <- c(5, 2, 3, 1, 4)\n  expect_warning(\n    {\n      out <- recode_into(\n        x == 2 ~ 1,\n        y == 2 & x == 2 ~ 2,\n        default = 0\n      )\n    },\n    regexp = \"Several recode patterns\"\n  )\n})\n\ntest_that(\"recode_into, make sure recode works with missing in original variable\", {\n  data(mtcars)\n  mtcars$mpg[c(3, 10, 12, 15, 16)] <- NA\n  mtcars$cyl[c(2, 15, 16)] <- NA\n  d_recode_na <<- as.data.frame(mtcars)\n  out1_recoded_na <- recode_into(\n    d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,\n    d_recode_na$mpg <= 20 ~ 2,\n    d_recode_na$cyl == 4 ~ 3,\n    default = 0,\n    preserve_na = TRUE\n  )\n  out2_recoded_na <- recode_into(\n    d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,\n    d_recode_na$mpg <= 20 ~ 2,\n    default = 0,\n    preserve_na = TRUE\n  )\n  expect_message(\n    {\n      out3_recoded_na <- recode_into(\n        d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,\n        d_recode_na$mpg <= 20 ~ 2,\n        d_recode_na$cyl == 4 ~ 3,\n        default = 0,\n        preserve_na = FALSE\n      )\n    },\n    regex = \"Missing values in original variable\"\n  )\n  expect_message(\n    {\n      out4_recoded_na <- recode_into(\n        d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,\n        d_recode_na$mpg <= 20 ~ 2,\n        default = 0,\n        preserve_na = FALSE\n      )\n    },\n    regex = \"Missing values in original variable\"\n  )\n  # one NA in mpg is overwritten by valid value from cyl, total 5 NA\n  expect_identical(\n    out1_recoded_na,\n    c(\n      1,\n      NA,\n      3,\n      1,\n      2,\n      2,\n      2,\n      3,\n      3,\n      NA,\n      2,\n      NA,\n      2,\n      2,\n      NA,\n      NA,\n      2,\n      3,\n      3,\n      3,\n      3,\n      2,\n      2,\n      2,\n      2,\n      3,\n      3,\n      3,\n      2,\n      2,\n      2,\n      3\n    )\n  )\n  # total 6 NA\n  expect_identical(\n    out2_recoded_na,\n    c(\n      1,\n      NA,\n      NA,\n      1,\n      2,\n      2,\n      2,\n      0,\n      0,\n      NA,\n      2,\n      NA,\n      2,\n      2,\n      NA,\n      NA,\n      2,\n      0,\n      0,\n      0,\n      0,\n      2,\n      2,\n      2,\n      2,\n      0,\n      0,\n      0,\n      2,\n      2,\n      2,\n      0\n    )\n  )\n  # NA is preserved, set to default if not overwritten by other recodes\n  expect_identical(\n    out3_recoded_na,\n    c(\n      1,\n      0,\n      3,\n      1,\n      2,\n      2,\n      2,\n      3,\n      3,\n      0,\n      2,\n      0,\n      2,\n      2,\n      0,\n      0,\n      2,\n      3,\n      3,\n      3,\n      3,\n      2,\n      2,\n      2,\n      2,\n      3,\n      3,\n      3,\n      2,\n      2,\n      2,\n      3\n    )\n  )\n  expect_identical(\n    out4_recoded_na,\n    c(\n      1,\n      0,\n      0,\n      1,\n      2,\n      2,\n      2,\n      0,\n      0,\n      0,\n      2,\n      0,\n      2,\n      2,\n      0,\n      0,\n      2,\n      0,\n      0,\n      0,\n      0,\n      2,\n      2,\n      2,\n      2,\n      0,\n      0,\n      0,\n      2,\n      2,\n      2,\n      0\n    )\n  )\n})\n\ntest_that(\"recode_into, NA doesn't need to be of exact type\", {\n  data(mtcars)\n  x1 <- recode_into(\n    mpg > 10 ~ 1,\n    gear == 5 ~ NA_real_,\n    data = mtcars,\n    verbose = FALSE\n  )\n  x2 <- recode_into(\n    mpg > 10 ~ 1,\n    gear == 5 ~ NA,\n    data = mtcars,\n    verbose = FALSE\n  )\n  expect_identical(x1, x2)\n})\n"
  },
  {
    "path": "tests/testthat/test-replace_nan_inf.R",
    "content": "test_that(\"extract from data frame\", {\n  x <- c(1, 2, NA, 3, NaN, 4, NA, 5, Inf, -Inf, 6, 7)\n\n  expect_identical(\n    replace_nan_inf(x),\n    c(1, 2, NA, 3, NA, 4, NA, 5, NA, NA, 6, 7)\n  )\n\n  # a data frame\n  df <- data.frame(\n    x = c(1, NA, 5, Inf, 2, NA),\n    y = c(3, NaN, 4, -Inf, 6, 7),\n    stringsAsFactors = FALSE\n  )\n\n  expect_identical(\n    replace_nan_inf(df),\n    structure(\n      list(\n        x = c(1, NA, 5, NA, 2, NA),\n        y = c(3, NA, 4, NA, 6, 7)\n      ),\n      row.names = c(NA, -6L),\n      class = \"data.frame\"\n    )\n  )\n\n  expect_identical(\n    replace_nan_inf(df, select = starts_with(\"x\")),\n    structure(\n      list(\n        x = c(1, NA, 5, NA, 2, NA),\n        y = c(3, NaN, 4, -Inf, 6, 7)\n      ),\n      row.names = c(NA, -6L),\n      class = \"data.frame\"\n    )\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-rescale_weights.R",
    "content": "test_that(\"rescale_weights works as expected\", {\n  data(nhanes_sample)\n  # convert tibble into data frame, so check-hard GHA works\n  nhanes_sample <- as.data.frame(nhanes_sample)\n\n  expect_snapshot(head(rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\")))\n\n  expect_snapshot(head(rescale_weights(\n    nhanes_sample,\n    \"WTINT2YR\",\n    c(\"SDMVSTRA\", \"SDMVPSU\")\n  )))\n\n  expect_snapshot(head(rescale_weights(\n    nhanes_sample,\n    probability_weights = \"WTINT2YR\",\n    method = \"kish\"\n  )))\n\n  out <- rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\")\n  expect_equal(sum(out$rescaled_weights_a), 2992, tolerance = 1e-3)\n  expect_equal(sum(out$rescaled_weights_b), 2244.71451, tolerance = 1e-3)\n  out <- rescale_weights(nhanes_sample, \"WTINT2YR\", method = \"kish\")\n  expect_equal(sum(out$rescaled_weights), 2162.53961, tolerance = 1e-3)\n  out <- rescale_weights(\n    nhanes_sample,\n    \"WTINT2YR\",\n    by = \"SDMVPSU\",\n    method = \"kish\"\n  )\n  expect_equal(sum(out$rescaled_weights), 2163.3657, tolerance = 1e-3)\n})\n\n\ntest_that(\"rescale_weights works as expected\", {\n  data(nhanes_sample)\n  # convert tibble into data frame, so check-hard GHA works\n  nhanes_sample <- as.data.frame(nhanes_sample)[1:20, ]\n\n  # add NAs\n  set.seed(123)\n  nhanes_sample$WTINT2YR[sample.int(nrow(nhanes_sample), 5)] <- NA\n\n  expect_snapshot(rescale_weights(nhanes_sample, \"WTINT2YR\", \"SDMVSTRA\"))\n  expect_snapshot(rescale_weights(nhanes_sample, \"WTINT2YR\", method = \"kish\"))\n})\n\n\ntest_that(\"rescale_weights nested works as expected\", {\n  data(nhanes_sample)\n  # convert tibble into data frame, so check-hard GHA works\n  nhanes_sample <- as.data.frame(nhanes_sample)\n\n  expect_snapshot(\n    rescale_weights(\n      data = head(nhanes_sample, n = 30),\n      by = c(\"SDMVSTRA\", \"SDMVPSU\"),\n      probability_weights = \"WTINT2YR\",\n      nest = TRUE\n    )\n  )\n\n  expect_warning(\n    {\n      x <- rescale_weights(\n        data = head(nhanes_sample),\n        by = \"SDMVPSU\",\n        probability_weights = \"WTINT2YR\",\n        nest = TRUE\n      )\n    },\n    \"Only one group variable selected\"\n  )\n\n  expect_identical(\n    x,\n    rescale_weights(\n      data = head(nhanes_sample),\n      by = \"SDMVPSU\",\n      probability_weights = \"WTINT2YR\"\n    )\n  )\n})\n\n\ntest_that(\"rescale_weights errors and warnings\", {\n  data(nhanes_sample)\n  expect_error(\n    rescale_weights(\n      data = head(nhanes_sample, n = 30),\n      by = c(\"a\", \"SDMVSTRA\", \"c\"),\n      probability_weights = \"WTINT2YR\"\n    ),\n    regex = \"The following\"\n  )\n  expect_error(\n    rescale_weights(\n      data = head(nhanes_sample, n = 30),\n      by = \"SDMVSTRA\",\n      probability_weights = NULL\n    ),\n    regex = \"is missing, but required\"\n  )\n  expect_error(\n    rescale_weights(\n      data = head(nhanes_sample, n = 30),\n      by = NULL,\n      probability_weights = \"WTINT2YR\"\n    ),\n    regex = \"must be specified\"\n  )\n  expect_error(\n    rescale_weights(\n      data = head(nhanes_sample, n = 30),\n      by = \"abc\",\n      probability_weights = \"WTINT2YR\",\n      method = \"kish\"\n    ),\n    regex = \"The following variable\"\n  )\n  expect_warning(\n    rescale_weights(\n      data = head(nhanes_sample, n = 30),\n      by = \"SDMVSTRA\",\n      probability_weights = \"WTINT2YR\",\n      nest = TRUE,\n      method = \"kish\"\n    ),\n    regex = \"is ignored\"\n  )\n  expect_error(\n    rescale_weights(\n      data = head(nhanes_sample, n = 30),\n      probability_weights = \"WTINT2YR\",\n      method = \"dish\"\n    ),\n    regex = \"Invalid option for argument\"\n  )\n\n  nhanes_sample$rescaled_weights_a <- 1\n  expect_warning(\n    {\n      out <- rescale_weights(\n        data = head(nhanes_sample, n = 30),\n        by = \"SDMVSTRA\",\n        probability_weights = \"WTINT2YR\"\n      )\n    },\n    regex = \"The variable name\"\n  )\n  expect_named(\n    out,\n    c(\n      \"total\",\n      \"age\",\n      \"RIAGENDR\",\n      \"RIDRETH1\",\n      \"SDMVPSU\",\n      \"SDMVSTRA\",\n      \"WTINT2YR\",\n      \"rescaled_weights_a\",\n      \"rescaled_weights_a_1\",\n      \"rescaled_weights_b\"\n    )\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-reshape_ci.R",
    "content": "test_that(\"reshape_ci with single CI level\", {\n  df <- data.frame(\n    Parameter = c(\"Term 1\"),\n    CI = c(0.8),\n    CI_low = c(0.2),\n    CI_high = c(0.5),\n    stringsAsFactors = FALSE\n  )\n\n  df_reshape <- reshape_ci(df)\n\n  expect_snapshot(df_reshape)\n})\n\n\ntest_that(\"reshape_ci with multiple CI levels\", {\n  x <- data.frame(\n    Parameter = c(\"Term 1\", \"Term 2\", \"Term 1\", \"Term 2\"),\n    CI = c(0.8, 0.8, 0.9, 0.9),\n    CI_low = c(0.2, 0.3, 0.1, 0.15),\n    CI_high = c(0.5, 0.6, 0.8, 0.85),\n    stringsAsFactors = FALSE\n  )\n\n  expect_snapshot(reshape_ci(x))\n  expect_snapshot(reshape_ci(reshape_ci(x)))\n})\n"
  },
  {
    "path": "tests/testthat/test-row_count.R",
    "content": "test_that(\"row_count\", {\n  d_mn <- data.frame(\n    c1 = c(1, 2, NA, 4),\n    c2 = c(NA, 2, NA, 5),\n    c3 = c(NA, 4, NA, NA),\n    c4 = c(2, 3, 7, 8)\n  )\n  expect_identical(row_count(d_mn, count = 2), c(1, 2, 0, 0))\n  expect_identical(row_count(d_mn, count = NA), c(2, 0, 3, 1))\n  d_mn <- data.frame(\n    c1 = c(\"a\", \"b\", NA, \"c\"),\n    c2 = c(NA, \"b\", NA, \"d\"),\n    c3 = c(NA, 4, NA, NA),\n    c4 = c(2, 3, 7, Inf),\n    stringsAsFactors = FALSE\n  )\n  expect_identical(row_count(d_mn, count = \"b\"), c(0, 2, 0, 0))\n  expect_identical(row_count(d_mn, count = Inf), c(0, 0, 0, 1))\n})\n\ntest_that(\"row_count, errors or messages\", {\n  data(iris)\n  expect_error(\n    expect_warning(row_count(iris, select = \"abc\")),\n    regex = \"must be a valid\"\n  )\n  expect_error(\n    expect_warning(row_count(iris, select = \"abc\", count = 3)),\n    regex = \"No columns\"\n  )\n  expect_error(row_count(iris[1], count = 3), regex = \"with at least\")\n  expect_error(\n    row_count(iris[-seq_len(nrow(iris)), , drop = FALSE], count = 2),\n    regex = \"one row\"\n  )\n})\n\ntest_that(\"row_count, allow_coercion match\", {\n  d_mn <- data.frame(\n    c1 = c(\"1\", \"2\", NA, \"3\"),\n    c2 = c(NA, \"2\", NA, \"3\"),\n    c3 = c(NA, 4, NA, NA),\n    c4 = c(2, 3, 7, Inf),\n    stringsAsFactors = FALSE\n  )\n  expect_identical(\n    row_count(d_mn, count = 2, allow_coercion = TRUE),\n    c(1, 2, 0, 0)\n  )\n  expect_identical(\n    row_count(d_mn, count = 2, allow_coercion = FALSE),\n    c(1, 0, 0, 0)\n  )\n  expect_identical(\n    row_count(d_mn, count = \"2\", allow_coercion = FALSE),\n    c(0, 2, 0, 0)\n  )\n  expect_identical(\n    row_count(d_mn, count = factor(\"2\"), allow_coercion = TRUE),\n    c(1, 2, 0, 0)\n  )\n  expect_error(\n    row_count(d_mn, count = factor(\"2\"), allow_coercion = FALSE),\n    regex = \"No column has\"\n  )\n\n  # mix character / factor\n  d_mn <- data.frame(\n    c1 = factor(c(\"1\", \"2\", NA, \"3\")),\n    c2 = c(\"2\", \"1\", NA, \"3\"),\n    c3 = c(NA, 4, NA, NA),\n    c4 = c(2, 3, 7, Inf),\n    stringsAsFactors = FALSE\n  )\n  expect_identical(\n    row_count(d_mn, count = 2, allow_coercion = TRUE),\n    c(2, 1, 0, 0)\n  )\n  expect_identical(\n    row_count(d_mn, count = 2, allow_coercion = FALSE),\n    c(1, 0, 0, 0)\n  )\n  expect_identical(\n    row_count(d_mn, count = \"2\", allow_coercion = FALSE),\n    c(1, 0, 0, 0)\n  )\n  expect_identical(\n    row_count(d_mn, count = \"2\", allow_coercion = TRUE),\n    c(2, 1, 0, 0)\n  )\n  expect_identical(\n    row_count(d_mn, count = factor(\"2\"), allow_coercion = FALSE),\n    c(0, 1, 0, 0)\n  )\n  expect_identical(\n    row_count(d_mn, count = factor(\"2\"), allow_coercion = TRUE),\n    c(2, 1, 0, 0)\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-row_means.R",
    "content": "test_that(\"row_means/sums\", {\n  d_mn <- data.frame(\n    c1 = c(1, 2, NA, 4),\n    c2 = c(NA, 2, NA, 5),\n    c3 = c(NA, 4, NA, NA),\n    c4 = c(2, 3, 7, 8)\n  )\n  expect_equal(\n    row_means(d_mn, min_valid = 4),\n    c(NA, 2.75, NA, NA),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    row_means(d_mn, min_valid = 3),\n    c(NA, 2.75, NA, 5.66667),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    row_means(d_mn, min_valid = 2),\n    c(1.5, 2.75, NA, 5.66667),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    row_means(d_mn, min_valid = 1),\n    c(1.5, 2.75, 7, 5.66667),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    row_means(d_mn, min_valid = 0.5),\n    c(1.5, 2.75, NA, 5.66667),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    row_means(d_mn, min_valid = 0.75),\n    c(NA, 2.75, NA, 5.66667),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    row_means(d_mn, min_valid = 2, digits = 1),\n    c(1.5, 2.8, NA, 5.7),\n    tolerance = 1e-1\n  )\n  expect_message(row_means(iris), regex = \"Only numeric\")\n  expect_equal(\n    row_means(iris, verbose = FALSE),\n    rowMeans(iris[, 1:4]),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  expect_equal(\n    row_sums(d_mn, min_valid = 4),\n    c(NA, 11, NA, NA),\n    tolerance = 1e-3\n  )\n  expect_equal(\n    row_sums(d_mn, min_valid = 3),\n    c(NA, 11, NA, 17),\n    tolerance = 1e-3\n  )\n  expect_message(row_sums(iris), regex = \"Only numeric\")\n})\n\ntest_that(\"row_means/sums, errors or messages\", {\n  data(iris)\n  expect_error(\n    expect_warning(row_means(iris, select = \"abc\")),\n    regex = \"No columns\"\n  )\n  expect_error(\n    expect_warning(row_sums(iris, select = \"abc\")),\n    regex = \"No columns\"\n  )\n  expect_error(row_means(iris[1], min_valid = 1), regex = \"two numeric\")\n  expect_error(row_means(iris, min_valid = 1:4), regex = \"numeric value\")\n  expect_error(row_means(iris, min_valid = \"a\"), regex = \"numeric value\")\n  expect_message(row_means(iris[1:3, ], min_valid = 3), regex = \"Only numeric\")\n  expect_silent(row_means(iris[1:3, ], min_valid = 3, verbose = FALSE))\n  expect_error(row_sums(iris[1], min_valid = 1), regex = \"two numeric\")\n  expect_message(row_sums(iris[1:3, ], min_valid = 3), regex = \"Only numeric\")\n  expect_silent(row_sums(iris[1:3, ], min_valid = 3, verbose = FALSE))\n})\n"
  },
  {
    "path": "tests/testthat/test-select_nse.R",
    "content": "foo <- function(data, select = NULL, exclude = NULL, regex = FALSE) {\n  .select_nse(\n    select,\n    data,\n    exclude = exclude,\n    regex = regex,\n    ignore_case = FALSE\n  )\n}\n\ntest_that(\".select_nse needs data\", {\n  expect_error(\n    foo(select = \"Sepal.Length\", data = NULL),\n    regexp = \"must be provided\"\n  )\n})\n\ntest_that(\".select_nse needs a data frame or something coercible to a dataframe\", {\n  expect_identical(\n    foo(select = \"Sepal.Length\", data = as.matrix(head(iris))),\n    \"Sepal.Length\"\n  )\n  expect_error(\n    foo(select = \"Sepal.Length\", data = list(1:3, 1:2)),\n    regexp = \"must be a data frame\"\n  )\n})\n\ntest_that(\".select_nse: arg 'select' works\", {\n  expect_identical(\n    foo(iris, select = NULL),\n    names(iris)\n  )\n  expect_identical(\n    foo(iris, Petal.Length),\n    \"Petal.Length\"\n  )\n  expect_identical(\n    foo(iris, c(\"Petal.Length\", \"Sepal.Width\")),\n    c(\"Petal.Length\", \"Sepal.Width\")\n  )\n  expect_identical(\n    foo(iris, c(3, 2)),\n    c(\"Petal.Length\", \"Sepal.Width\")\n  )\n  expect_identical(\n    foo(iris, 1:5),\n    names(iris)\n  )\n  expect_identical(\n    foo(iris, is.numeric),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n  expect_identical(\n    foo(iris, is.numeric()),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\")\n  )\n  expect_identical(\n    extract_column_names(iris, sepal.length, ignore_case = TRUE),\n    \"Sepal.Length\"\n  )\n  expect_identical(\n    foo(iris, starts_with(\"Petal\")),\n    c(\"Petal.Length\", \"Petal.Width\")\n  )\n  expect_identical(\n    foo(iris, ends_with(\"Length\")),\n    c(\"Sepal.Length\", \"Petal.Length\")\n  )\n  expect_identical(\n    foo(iris, contains(\"Length\")),\n    c(\"Sepal.Length\", \"Petal.Length\")\n  )\n  expect_identical(\n    foo(iris, regex(\"Length$\")),\n    c(\"Sepal.Length\", \"Petal.Length\")\n  )\n  expect_identical(\n    foo(iris, \"Len\", regex = TRUE),\n    c(\"Sepal.Length\", \"Petal.Length\")\n  )\n})\n\n\ntest_that(\".select_nse: arg 'exclude' works\", {\n  expect_identical(\n    foo(iris, exclude = c(\"Petal.Length\", \"Sepal.Width\")),\n    c(\"Sepal.Length\", \"Petal.Width\", \"Species\")\n  )\n  expect_identical(\n    foo(iris, exclude = c(3, 2)),\n    c(\"Sepal.Length\", \"Petal.Width\", \"Species\")\n  )\n  expect_identical(\n    foo(iris, exclude = starts_with(\"Petal\")),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Species\")\n  )\n  expect_identical(\n    foo(iris, exclude = ends_with(\"Length\")),\n    c(\"Sepal.Width\", \"Petal.Width\", \"Species\")\n  )\n  expect_identical(\n    foo(iris, exclude = contains(\"Length\")),\n    c(\"Sepal.Width\", \"Petal.Width\", \"Species\")\n  )\n  expect_identical(\n    foo(iris, exclude = regex(\"Length$\")),\n    c(\"Sepal.Width\", \"Petal.Width\", \"Species\")\n  )\n})\n\ntest_that(\".select_nse: args 'select' and 'exclude' at the same time\", {\n  expect_identical(\n    foo(iris, select = contains(\"Length\"), exclude = starts_with(\"Petal\")),\n    \"Sepal.Length\"\n  )\n  expect_identical(\n    foo(iris, select = contains(\"Length\"), exclude = contains(\"Length\")),\n    character(0)\n  )\n})\n\ntest_that(\".select_nse: misc\", {\n  iris2 <- iris[, 1:3]\n  expect_identical(\n    foo(iris, select = names(iris2)),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\")\n  )\n  expect_identical(\n    foo(iris, select = names(iris2)[2]),\n    \"Sepal.Width\"\n  )\n})\n\ntest_that(\".select_nse: works with function and namespace\", {\n  model <- lm(Petal.Length ~ Petal.Width, data = iris)\n  out <- data_select(\n    iris,\n    insight::find_predictors(model, effects = \"fixed\", flatten = TRUE)\n  )\n  expect_identical(out, iris[\"Petal.Width\"])\n\n  fun <- function(x) {\n    data_select(iris, x)\n  }\n  out <- fun(insight::find_predictors(model, effects = \"fixed\", flatten = TRUE))\n  expect_identical(out, iris[\"Petal.Width\"])\n\n  x <- \"Sepal.Length\"\n  out <- fun(insight::find_predictors(model, effects = \"fixed\", flatten = TRUE))\n  expect_identical(out, iris[\"Petal.Width\"])\n})\n\ntest_that(\".select_nse: allow character vector with :\", {\n  data(mtcars)\n  out <- data_select(mtcars, c(\"cyl:hp\", \"wt\", \"vs:gear\"))\n  expect_named(out, c(\"cyl\", \"disp\", \"hp\", \"wt\", \"vs\", \"am\", \"gear\"))\n  out <- data_select(mtcars, c(\"cyl:hp\", \"wta\", \"vs:gear\"))\n  expect_named(out, c(\"cyl\", \"disp\", \"hp\", \"vs\", \"am\", \"gear\"))\n  out <- data_select(mtcars, c(\"hp:cyl\", \"wta\", \"vs:gear\"))\n  expect_named(out, c(\"hp\", \"disp\", \"cyl\", \"vs\", \"am\", \"gear\"))\n  out <- data_select(mtcars, c(\"cyl:hq\", \"wt\", \"vs:gear\"))\n  expect_named(out, c(\"wt\", \"vs\", \"am\", \"gear\"))\n\n  expect_warning(\n    center(mtcars, c(\"cyl:hp\", \"wta\", \"vs:gear\"), verbose = TRUE),\n    regex = \"Did you mean \\\"wt\\\"\"\n  )\n  expect_warning(\n    center(mtcars, c(\"cyl:hq\", \"wt\", \"vs:gear\"), verbose = TRUE),\n    regex = \"Did you mean one of \\\"hp\\\"\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-skewness-kurtosis.R",
    "content": "test_that(\"skewness\", {\n  expect_equal(skewness(iris$Sepal.Length)[[1]], 0.314911, tolerance = 1e-3)\n  expect_equal(\n    skewness(iris$Sepal.Length, type = 1)[[1]],\n    0.3117531,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    skewness(iris$Sepal.Length, type = 3)[[1]],\n    0.3086407,\n    tolerance = 1e-3\n  )\n})\n\ntest_that(\"kurtosis\", {\n  expect_equal(kurtosis(iris$Sepal.Length)[[1]], -0.552064, tolerance = 1e-3)\n  expect_equal(\n    kurtosis(iris$Sepal.Length, type = 1)[[1]],\n    -0.5735679,\n    tolerance = 1e-3\n  )\n  expect_equal(\n    kurtosis(iris$Sepal.Length, type = 3)[[1]],\n    -0.6058125,\n    tolerance = 1e-3\n  )\n})\n\ntest_that(\"kurtosis and skewness with bootstrapping\", {\n  skip_if_not_installed(\"boot\")\n\n  set.seed(123)\n  expect_equal(\n    skewness(iris$Sepal.Length, iterations = 100)[[2]],\n    0.1262203,\n    tolerance = 1e-3\n  )\n\n  set.seed(123)\n  expect_equal(\n    kurtosis(iris$Sepal.Length, iterations = 100)[[2]],\n    0.1878741,\n    tolerance = 1e-3\n  )\n})\n\ntest_that(\"skewness works with data frames\", {\n  skip_if_not_installed(\"boot\")\n\n  set.seed(123)\n  expect_snapshot(skewness(iris[, 1:4]))\n\n  set.seed(123)\n  expect_snapshot(skewness(iris[, 1:4], iterations = 100))\n})\n\ntest_that(\"kurtosis works with data frames\", {\n  skip_if_not_installed(\"boot\")\n\n  set.seed(123)\n  expect_snapshot(kurtosis(iris[, 1:4]))\n\n  set.seed(123)\n  expect_snapshot(kurtosis(iris[, 1:4], iterations = 100))\n})\n\n\ntest_that(\"skewness works with matrices\", {\n  skip_if_not_installed(\"boot\")\n\n  set.seed(123)\n  expect_snapshot(skewness(as.matrix(iris[, 1:4])))\n\n  set.seed(123)\n  expect_snapshot(skewness(as.matrix(iris[, 1:4]), iterations = 100))\n})\n\ntest_that(\"kurtosis works with matrices\", {\n  skip_if_not_installed(\"boot\")\n\n  set.seed(123)\n  expect_snapshot(kurtosis(as.matrix(iris[, 1:4])))\n\n  set.seed(123)\n  expect_snapshot(kurtosis(as.matrix(iris[, 1:4]), iterations = 100))\n})\n\ntest_that(\"skewness uses type 1 if not enough obs for type 2\", {\n  expect_warning(\n    test <- skewness(c(1, 2), type = \"2\"),\n    \"Need at least 3 complete obs\"\n  )\n  expect_equal(test, skewness(c(1, 2), type = \"1\"))\n})\n\ntest_that(\"kurtosis uses type 1 if not enough obs for type 2\", {\n  expect_warning(\n    test <- kurtosis(c(1, 2, 3), type = \"2\"),\n    \"Need at least 4 complete obs\"\n  )\n  expect_equal(test, kurtosis(c(1, 2, 3), type = \"1\"))\n})\n"
  },
  {
    "path": "tests/testthat/test-smoothness.R",
    "content": "test_that(\"smoothness works\", {\n  set.seed(123)\n  x <- (-10:10)^3 + rnorm(21, 0, 100)\n  expect_equal(smoothness(x)[[1]], 0.9030014, tolerance = 0.001)\n  expect_equal(smoothness(x, method = \"auto\")[[1]], 1.750452, tolerance = 0.001)\n})\n\ntest_that(\"smoothness works with iterations\", {\n  skip_if_not_installed(\"boot\")\n\n  set.seed(123)\n  x <- (-10:10)^3 + rnorm(21, 0, 100)\n  expect_equal(\n    smoothness(x, iterations = 100)[[1]],\n    0.9030014,\n    tolerance = 0.001\n  )\n  expect_equal(\n    smoothness(x, method = \"auto\", iterations = 100)[[1]],\n    1.750452,\n    tolerance = 0.001\n  )\n})\n\n\ntest_that(\"smoothness with lag works\", {\n  set.seed(123)\n  x <- (-10:10)^3 + rnorm(21, 0, 100)\n  expect_equal(smoothness(x, lag = 0.5)[[1]], 0.5859015, tolerance = 0.001)\n  expect_error(smoothness(x, lag = 0), \"'lag' cannot be that small.\")\n})\n\ntest_that(\"smoothness works with data frames\", {\n  skip_if(getRversion() < \"4.0.0\")\n  set.seed(123)\n  expect_snapshot(smoothness(BOD))\n})\n"
  },
  {
    "path": "tests/testthat/test-standardize-data.R",
    "content": "# standardize.numeric -----------------------------------------------------\ntest_that(\"standardize.numeric\", {\n  x <- standardize(seq(0, 1, length.out = 100))\n  expect_equal(mean(x), 0, tolerance = 0.01)\n\n  x <- standardize(seq(0, 1, length.out = 100), two_sd = TRUE)\n  expect_equal(sd(x), 0.5, tolerance = 0.01)\n\n  x <- standardize(seq(0, 1, length.out = 100), robust = TRUE)\n  expect_equal(median(x), 0, tolerance = 0.01)\n\n  x <- standardize(seq(0, 1, length.out = 100), robust = TRUE, two_sd = TRUE)\n  expect_equal(mad(x), 0.5, tolerance = 0.01)\n\n  expect_message(standardize(c(0, 0, 0, 1, 1)))\n\n  x <- standardize(c(-1, 0, 1), reference = seq(3, 4, length.out = 100))\n  expect_equal(mean(x), -11.943, tolerance = 0.01)\n})\n\n\n# standardize factor / Date -----------------------------------------------\ntest_that(\"standardize.numeric\", {\n  f <- factor(c(\"c\", \"a\", \"b\"))\n  expect_identical(standardize(f), f)\n  expect_equal(standardize(f, force = TRUE), c(1, -1, 0), ignore_attr = TRUE)\n\n  d <- as.Date(c(\"1989/08/06\", \"1989/08/04\", \"1989/08/05\"))\n  expect_identical(standardize(d), d)\n  expect_equal(standardize(d, force = TRUE), c(1, -1, 0), ignore_attr = TRUE)\n})\n\n\n# standardize.data.frame --------------------------------------------------\n\ntest_that(\"standardize.data.frame\", {\n  skip_if_not_installed(\"poorman\")\n\n  data(iris)\n  x <- standardize(iris)\n  expect_equal(mean(x$Sepal.Length), 0, tolerance = 0.01)\n  expect_length(levels(x$Species), 3)\n  expect_equal(\n    mean(subset(x, Species == \"virginica\")$Sepal.Length),\n    0.90,\n    tolerance = 0.01\n  )\n\n  # check class attributes\n  expect_identical(\n    vapply(x, class, character(1)),\n    c(\n      Sepal.Length = \"numeric\",\n      Sepal.Width = \"numeric\",\n      Petal.Length = \"numeric\",\n      Petal.Width = \"numeric\",\n      Species = \"factor\"\n    )\n  )\n\n  x2 <- standardize(x = iris[1, ], reference = iris)\n  expect_true(all(x2[1, ] == x[1, ]))\n\n  x <- standardize(poorman::group_by(iris, Species))\n  expect_equal(mean(x$Sepal.Length), 0, tolerance = 0.01)\n  expect_length(levels(x$Species), 3)\n  expect_equal(\n    mean(subset(x, Species == \"virginica\")$Sepal.Length),\n    0,\n    tolerance = 0.01\n  )\n})\n\n\ntest_that(\"standardize.data.frame, NAs\", {\n  skip_if_not_installed(\"poorman\")\n\n  data(iris)\n  iris$Sepal.Width[c(148, 65, 33, 58, 54, 93, 114, 72, 32, 23)] <- NA\n  iris$Sepal.Length[c(11, 30, 141, 146, 13, 149, 6, 8, 48, 101)] <- NA\n\n  x <- standardize(iris)\n  expect_equal(\n    head(x$Sepal.Length),\n    c(-0.9163, -1.1588, -1.4013, -1.5226, -1.0376, NA),\n    tolerance = 0.01\n  )\n  expect_equal(\n    head(x$Sepal.Width),\n    c(1.0237, -0.151, 0.3189, 0.0839, 1.2586, 1.9635),\n    tolerance = 0.01\n  )\n  expect_identical(mean(x$Sepal.Length), NA_real_)\n\n  x <- standardize(iris, two_sd = TRUE)\n  expect_equal(\n    head(x$Sepal.Length),\n    c(-0.4603, -0.5811, -0.7019, -0.7623, -0.5207, NA),\n    tolerance = 0.01\n  )\n  expect_equal(\n    head(x$Sepal.Width),\n    c(0.5118, -0.0755, 0.1594, 0.042, 0.6293, 0.9817),\n    tolerance = 0.01\n  )\n  expect_identical(mean(x$Sepal.Length), NA_real_)\n\n  x <- standardize(poorman::group_by(iris, .data$Species))\n  expect_equal(\n    head(x$Sepal.Length),\n    c(0.2547, -0.3057, -0.8661, -1.1463, -0.0255, NA),\n    tolerance = 0.01\n  )\n  expect_equal(\n    head(x$Sepal.Width),\n    c(0.2369, -1.0887, -0.5584, -0.8235, 0.502, 1.2974),\n    tolerance = 0.01\n  )\n  expect_identical(mean(x$Sepal.Length), NA_real_)\n})\n\n\ntest_that(\"standardize.data.frame, apend\", {\n  skip_if_not_installed(\"poorman\")\n\n  data(iris)\n  iris$Sepal.Width[c(26, 43, 56, 11, 66, 132, 23, 133, 131, 28)] <- NA\n  iris$Sepal.Length[c(32, 12, 109, 92, 119, 49, 83, 113, 64, 30)] <- NA\n\n  x <- standardize(iris, append = TRUE)\n  expect_identical(\n    colnames(x),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"Sepal.Length_z\",\n      \"Sepal.Width_z\",\n      \"Petal.Length_z\",\n      \"Petal.Width_z\"\n    )\n  )\n  expect_equal(\n    head(x$Sepal.Length_z),\n    c(-0.8953, -1.1385, -1.3816, -1.5032, -1.0169, -0.5306),\n    tolerance = 0.01\n  )\n  expect_equal(\n    head(x$Sepal.Width_z),\n    c(1.04, -0.1029, 0.3543, 0.1257, 1.2685, 1.9542),\n    tolerance = 0.01\n  )\n  expect_identical(mean(x$Sepal.Length_z), NA_real_)\n\n  x <- standardize(iris, two_sd = TRUE, append = TRUE)\n  expect_equal(\n    head(x$Sepal.Length_z),\n    c(-0.4477, -0.5692, -0.6908, -0.7516, -0.5084, -0.2653),\n    tolerance = 0.01\n  )\n  expect_equal(\n    head(x$Sepal.Width_z),\n    c(0.52, -0.0514, 0.1771, 0.0629, 0.6343, 0.9771),\n    tolerance = 0.01\n  )\n  expect_identical(mean(x$Sepal.Length_z), NA_real_)\n\n  x <- standardize(poorman::group_by(iris, .data$Species), append = TRUE)\n  expect_equal(\n    head(x$Sepal.Length_z),\n    c(0.2746, -0.2868, -0.8483, -1.129, -0.0061, 1.1168),\n    tolerance = 0.01\n  )\n  expect_equal(\n    head(x$Sepal.Width_z),\n    c(0.1766, -1.1051, -0.5924, -0.8487, 0.4329, 1.2019),\n    tolerance = 0.01\n  )\n  expect_identical(mean(x$Sepal.Length_z), NA_real_)\n})\n\n\ntest_that(\"standardize.data.frame, weights\", {\n  skip_if_not_installed(\"poorman\")\n\n  x <- rexp(30)\n  w <- rpois(30, 20) + 1\n\n  expect_equal(\n    sqrt(cov.wt(cbind(x, x), w)$cov[1, 1]),\n    attr(standardize(x, weights = w), \"scale\"),\n    tolerance = 1e-4\n  )\n  expect_equal(\n    standardize(x, weights = w),\n    standardize(data.frame(x), weights = w)$x,\n    tolerance = 1e-4,\n    ignore_attr = TRUE\n  )\n\n  # name and vector give same results\n  expect_equal(\n    standardize(mtcars, exclude = \"cyl\", weights = mtcars$cyl),\n    standardize(mtcars, weights = \"cyl\"),\n    tolerance = 1e-4\n  )\n\n  d <- poorman::group_by(mtcars, am)\n  expect_warning(standardize(d, weights = d$cyl))\n})\n\n\n# Unstandardize -----------------------------------------------------------\ntest_that(\"unstandardize, numeric\", {\n  data(iris)\n  x <- standardize(iris$Petal.Length)\n  rez <- unstandardize(x)\n  expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE)\n\n  rez <- unstandardize(x, reference = iris$Petal.Length)\n  expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE)\n\n  rez <- unstandardize(\n    x,\n    center = mean(iris$Petal.Length),\n    scale = stats::sd(iris$Petal.Length)\n  )\n  expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE)\n\n  rez <- unstandardize(\n    0,\n    center = mean(iris$Petal.Length),\n    scale = stats::sd(iris$Petal.Length)\n  )\n  expect_equal(rez, mean(iris$Petal.Length), tolerance = 1e-3)\n\n  x <- standardize(iris$Petal.Length, robust = TRUE, two_sd = TRUE)\n  rez <- unstandardize(x, robust = TRUE, two_sd = TRUE)\n  expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE)\n\n  x <- scale(iris$Petal.Length)\n  rez <- unstandardize(x)\n  expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE)\n\n  x <- scale(iris$Petal.Length, center = 3, scale = 2)\n  rez <- unstandardize(x)\n  expect_equal(rez, iris$Petal.Length, tolerance = 1e-3, ignore_attr = TRUE)\n})\n\ntest_that(\"unstandardize, data frame\", {\n  skip_if_not_installed(\"poorman\")\n\n  data(iris)\n  x <- standardize(iris)\n  rez <- unstandardize(x)\n  expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE)\n\n  x <- standardize(iris, select = \"Petal.Length\")\n  rez <- unstandardize(x)\n  expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE)\n\n  x <- standardize(iris, select = starts_with(\"Pet\"))\n  rez <- unstandardize(x, select = starts_with(\"Pet\"))\n  expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE)\n\n  x <- standardize(iris, select = \"Petal.Length\")\n  rez <- unstandardize(\n    x,\n    center = c(Petal.Length = mean(iris$Petal.Length)),\n    scale = c(Petal.Length = stats::sd(iris$Petal.Length))\n  )\n  expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE)\n\n  expect_error(unstandardize(\n    x,\n    center = mean(iris$Petal.Length),\n    scale = stats::sd(iris$Petal.Length)\n  ))\n\n  x <- standardize(iris)\n  rez <- unstandardize(x, center = rep(0, 4), scale = rep(1, 4))\n  expect_equal(rez, x, tolerance = 0.1, ignore_attr = TRUE)\n\n  data(iris)\n  x <- standardize(iris, robust = TRUE, two_sd = TRUE)\n  rez <- unstandardize(x, robust = TRUE, two_sd = TRUE)\n  expect_equal(rez, iris, tolerance = 0.1, ignore_attr = TRUE)\n})\n\ntest_that(\"un/standardize, matrix\", {\n  set.seed(4)\n  x <- matrix(sample(8), nrow = 4)\n  colnames(x) <- letters[1:2]\n  rownames(x) <- LETTERS[1:4]\n\n  z1 <- standardize(x)\n  z2 <- scale(x)\n\n  expect_equal(z1, z2, ignore_attr = TRUE)\n  expect_equal(unstandardize(z1), x, ignore_attr = TRUE)\n  expect_identical(unstandardize(z2), unstandardize(z1))\n})\n\ntest_that(\"unstandardize with reference (data frame)\", {\n  x <- standardize(x = iris, reference = iris)\n  x2 <- unstandardize(x, reference = iris)\n  expect_equal(x2, iris, ignore_attr = TRUE)\n\n  x <- standardize(x = iris, reference = iris, robust = TRUE)\n  x2 <- unstandardize(x, reference = iris, robust = TRUE)\n  expect_equal(x2, iris, ignore_attr = TRUE)\n})\n\ntest_that(\"unstandardize does nothing with characters and factors\", {\n  expect_identical(\n    unstandardise(c(\"a\", \"b\")),\n    c(\"a\", \"b\")\n  )\n  expect_identical(\n    unstandardise(factor(c(1, 2))),\n    factor(c(1, 2))\n  )\n})\n\n# select helpers ------------------------------\ntest_that(\"standardize regex\", {\n  expect_identical(\n    standardize(mtcars, select = \"pg\", regex = TRUE),\n    standardize(mtcars, select = \"mpg\")\n  )\n})\n\n# standardize when only providing one of center/scale ---------------\ntest_that(\"standardize when only providing one of center/scale\", {\n  x <- 1:10\n  expect_identical(\n    as.vector(datawizard::standardize(x, center = FALSE)),\n    x / sd(x)\n  )\n  expect_identical(\n    as.vector(datawizard::standardize(x, center = 2)),\n    (x - 2) / sd(x)\n  )\n  expect_identical(\n    as.vector(datawizard::standardize(x, scale = FALSE)),\n    as.vector(datawizard::center(x))\n  )\n  expect_identical(\n    as.vector(datawizard::standardize(x, scale = 1.5)),\n    (x - mean(x)) / 1.5\n  )\n})\n\n\n# grouped data\n\ntest_that(\"unstandardize: grouped data\", {\n  skip_if_not_installed(\"poorman\")\n\n  # 1 group, 1 standardized var\n  stand <- poorman::group_by(mtcars, cyl)\n  stand <- standardize(stand, \"mpg\")\n  unstand <- unstandardize(stand, select = \"mpg\")\n  expect_identical(\n    poorman::ungroup(unstand),\n    mtcars,\n    ignore_attr = TRUE\n  )\n\n  expect_s3_class(unstand, \"grouped_df\")\n\n  # 2 groups, 1 standardized var\n  set.seed(123)\n  test <- iris\n  test$grp <- sample(c(\"A\", \"B\"), nrow(test), replace = TRUE)\n  stand <- poorman::group_by(test, Species, grp)\n  stand <- standardize(stand, \"Sepal.Length\")\n  expect_identical(\n    poorman::ungroup(unstandardize(stand, select = \"Sepal.Length\")),\n    test\n  )\n\n  # 2 groups, 2 standardized vars\n  set.seed(123)\n  test <- iris\n  test$grp <- sample(c(\"A\", \"B\"), nrow(test), replace = TRUE)\n  stand <- poorman::group_by(test, Species, grp)\n  stand <- standardize(stand, c(\"Sepal.Length\", \"Petal.Length\"))\n  expect_identical(\n    poorman::ungroup(unstandardize(\n      stand,\n      select = c(\"Sepal.Length\", \"Petal.Length\")\n    )),\n    test\n  )\n\n  expect_s3_class(unstand, \"grouped_df\")\n\n  # can't recover attributes\n  stand <- poorman::group_by(iris, Species)\n  stand <- standardize(stand, \"Sepal.Length\")\n  attr(stand, \"groups\") <- NULL\n\n  expect_error(\n    unstandardize(stand, \"Sepal.Length\"),\n    regexp = \"Couldn't retrieve the necessary information\"\n  )\n\n  # normalize applied on grouped data but unstandardize applied on ungrouped data\n  stand <- poorman::group_by(mtcars, cyl)\n  stand <- standardize(stand, \"mpg\")\n  stand <- poorman::ungroup(stand)\n\n  expect_error(\n    unstandardize(stand, \"mpg\"),\n    regexp = \"must provide the arguments\"\n  )\n\n  # standardize applied on grouped data but unstandardize applied different grouped\n  # data\n  stand <- poorman::group_by(stand, am)\n  expect_error(\n    unstandardize(stand, \"mpg\"),\n    regexp = \"Couldn't retrieve the necessary\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-standardize_datagrid.R",
    "content": "# standardize -----------------------------------------------------\ntest_that(\"standardize.datagrid\", {\n  x <- insight::get_datagrid(\n    iris,\n    by = \"Sepal.Length\",\n    range = \"sd\",\n    length = 3\n  )\n  out <- standardize(x)\n  expect_identical(as.numeric(out$Sepal.Length), c(-1, 0, 1), tolerance = 1e-3)\n  expect_identical(as.numeric(out$Sepal.Width), c(0, 0, 0), tolerance = 1e-3)\n\n  x <- insight::get_datagrid(iris, by = \"Sepal.Length = c(-1, 0)\")\n  out <- unstandardize(x, select = \"Sepal.Length\")\n  expect_identical(\n    out$Sepal.Length[1:2],\n    c(mean(iris$Sepal.Length) - sd(iris$Sepal.Length), mean(iris$Sepal.Length)),\n    tolerance = 1e-3\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-standardize_models.R",
    "content": "# standardize.lm ----------------------------------------------------------\ntest_that(\"standardize.lm\", {\n  iris2 <- na.omit(iris)\n  iris_z <- standardize(iris2)\n\n  m0 <- lm(Sepal.Length ~ Species * Petal.Width, data = iris_z)\n  m1 <- lm(Sepal.Length ~ Species * Petal.Width, data = iris2)\n  model <- standardize(m1)\n  expect_identical(coef(m0), coef(model))\n})\n\ntest_that(\"standardize, mlm\", {\n  m <- lm(cbind(mpg, hp) ~ cyl + am, data = mtcars)\n  m2 <- lm(scale(cbind(mpg, hp)) ~ scale(cyl) + scale(am), data = mtcars)\n\n  mz <- standardize(m)\n  expect_equal(coef(mz), coef(m2), ignore_attr = TRUE, tolerance = 1e-4)\n})\n\ntest_that(\"standardize | errors\", {\n  my_lm_external_formula <- function(.dat, predicted, predictor) {\n    my_formula <- as.formula(paste0(predicted, \"~\", predictor))\n    lm(formula = my_formula, data = .dat)\n  }\n\n  m <- my_lm_external_formula(mtcars, \"mpg\", \"am\")\n  ers <- capture_error(standardize(m))\n  expect_match(\n    as.character(ers),\n    \"Try instead to standardize the data\",\n    fixed = TRUE\n  )\n})\n\n\ntest_that(\"standardize | problematic formulas\", {\n  data(mtcars)\n  m <- lm(mpg ~ hp, data = mtcars)\n  expect_equal(\n    coef(standardise(m)),\n    c(`(Intercept)` = -3.14935717633686e-17, hp = -0.776168371826586),\n    tolerance = 1e-4\n  )\n\n  colnames(mtcars)[1] <- \"1_mpg\"\n  m <- lm(`1_mpg` ~ hp, data = mtcars)\n  expect_error(standardise(m), regex = \"Looks like\")\n\n  # works interactive only\n  # data(mtcars)\n  # m <- lm(mtcars$mpg ~ mtcars$hp)\n  # expect_error(standardise(m), regex = \"model formulas\")\n\n  m <- lm(mtcars[, 1] ~ hp, data = mtcars)\n  expect_error(standardise(m), regex = \"indexed data\")\n})\n\n\n# Transformations ---------------------------------------------------------\ntest_that(\"transformations\", {\n  skip_if_not_installed(\"effectsize\")\n  # deal with log / sqrt terms\n  expect_message(standardize(lm(mpg ~ sqrt(cyl) + log(hp), mtcars)))\n  expect_message(standardize(lm(mpg ~ sqrt(cyl), mtcars)))\n  expect_message(standardize(lm(mpg ~ log(hp), mtcars)))\n\n  # difference between stand-methods:\n  mt <- mtcars\n  mt$hp_100 <- mt$hp / 100\n  fit_exp <- lm(mpg ~ exp(hp_100), mt)\n  fit_scale1 <- lm(scale(mpg) ~ exp(scale(hp_100)), mt)\n  fit_scale2 <- lm(scale(mpg) ~ scale(exp(hp_100)), mt)\n  expect_equal(\n    effectsize::standardize_parameters(fit_exp, method = \"refit\")[2, 2],\n    unname(coef(fit_scale1)[2]),\n    ignore_attr = TRUE\n  )\n\n  expect_equal(\n    effectsize::standardize_parameters(fit_exp, method = \"basic\")[2, 2],\n    unname(coef(fit_scale2)[2]),\n    ignore_attr = TRUE\n  )\n\n  # fmt: skip\n  d <- data.frame(\n    time = factor(rep(c(\"1\", \"2\", \"3\", \"4\", \"5\"), 6)),\n    group = rep(rep(c(1, 2), 3), each = 5L),\n    sum = rep(c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50), 3)\n  )\n  m <- lm(log(sum + 1) ~ as.numeric(time) * group, data = d)\n\n  expect_message({\n    out <- standardize(m)\n  })\n  expect_identical(\n    coef(m),\n    c(\n      `(Intercept)` = -0.4575,\n      `as.numeric(time)` = 0.5492,\n      group = 0.3379,\n      `as.numeric(time):group` = 0.15779\n    ),\n    tolerance = 0.01\n  )\n})\n\n\n# W/ weights --------------------------------------------------------------\ntest_that(\"weights\", {\n  expect_warning(standardize(mtcars, weights = \"xx\"))\n\n  m <- lm(mpg ~ wt + hp, weights = cyl, mtcars)\n\n  sm <- standardize(m, weights = TRUE)\n  sm_data <- insight::get_data(sm, source = \"frame\")\n  sm_data2 <- standardize(\n    mtcars,\n    select = c(\"mpg\", \"wt\", \"hp\"),\n    weights = \"cyl\"\n  )\n  expect_identical(\n    sm_data[, c(\"mpg\", \"wt\", \"hp\")],\n    sm_data2[, c(\"mpg\", \"wt\", \"hp\")]\n  )\n\n  expect_error(standardize(m, weights = TRUE, robust = TRUE), NA)\n\n  # no weights in stding\n  sm_xw <- standardize(m, weights = FALSE)\n  sm_data_xw <- insight::get_data(sm_xw, source = \"frame\")\n  expect_false(isTRUE(all.equal(coef(sm)[-1], coef(sm_xw)[-1])))\n\n  skip_if_not_installed(\"effectsize\")\n  # refit and posthoc should give same results\n  stdREFIT <- effectsize::standardize_parameters(m, method = \"refit\")\n  expect_equal(\n    stdREFIT[[2]],\n    effectsize::standardize_parameters(m, method = \"posthoc\")[[2]],\n    ignore_attr = TRUE\n  )\n\n  expect_equal(\n    stdREFIT[[2]],\n    effectsize::standardize_parameters(m, method = \"basic\")[[2]],\n    ignore_attr = TRUE\n  )\n})\n\n\n# weights + missing data --------------------------------------------------\ntest_that(\"weights + NA\", {\n  set.seed(1234)\n  data(iris)\n\n  # data setup\n  iris$weight_me <- runif(nrow(iris))\n  iris$Sepal.Length[sample(nrow(iris), size = 10)] <- NA\n  iris$weight_me[sample(nrow(iris), size = 10)] <- NA\n\n  # standardize 2nd data set\n  iris2 <- standardize(\n    iris,\n    select = c(\"Sepal.Length\", \"Petal.Width\"),\n    remove_na = \"all\"\n  )\n  iris3 <- standardize(\n    iris,\n    select = c(\"Sepal.Length\", \"Petal.Width\"),\n    weights = \"weight_me\",\n    remove_na = \"selected\"\n  )\n\n  m1 <- lm(\n    Sepal.Length ~ Species + Petal.Width,\n    data = iris,\n    weights = weight_me\n  )\n\n  # weights, missing data, but data isn't weight-stdized\n  m2 <- lm(\n    Sepal.Length ~ Species + Petal.Width,\n    data = iris2,\n    weights = weight_me\n  )\n  sm2 <- standardize(m1, weights = FALSE)\n  expect_identical(coef(m2), coef(sm2))\n\n  # weights, missing data, and data is weight-stdized\n  m3 <- lm(\n    Sepal.Length ~ Species + Petal.Width,\n    data = iris3,\n    weights = weight_me\n  )\n  sm3 <- standardize(m1, weights = TRUE)\n  expect_identical(coef(m3), coef(sm3))\n})\n\n\n# weights + missing data ´+ na.action = na.exclude --------------------------------------------------\ntest_that(\"weights + NA + na.exclude\", {\n  skip_if_not_installed(\"effectsize\")\n  set.seed(1234)\n  data(iris)\n\n  # data setup\n  iris$weight_me <- runif(nrow(iris))\n  iris$Sepal.Length[sample(nrow(iris), size = 25)] <- NA\n  iris$weight_me[sample(nrow(iris), size = 15)] <- NA\n  d <- iris\n\n  m1 <- lm(\n    Sepal.Length ~ Species + Petal.Width,\n    data = d,\n    weights = weight_me,\n    na.action = na.exclude\n  )\n  m2 <- lm(Sepal.Length ~ Species + Petal.Width, data = d, weights = weight_me)\n\n  expect_identical(\n    coef(standardize(m2)),\n    coef(standardize(m1)),\n    tolerance = 1e-3\n  )\n  expect_identical(\n    effectsize::standardize_parameters(m1, method = \"basic\")[[2]],\n    effectsize::standardize_parameters(m2, method = \"basic\")[[2]],\n    tolerance = 1e-3\n  )\n})\n\n# subset ------------------\ntest_that(\"fail with subset\", {\n  data(\"mtcars\")\n\n  mod1 <- lm(mpg ~ hp, data = mtcars, subset = cyl > 4)\n\n  expect_error(standardise(mod1), regexp = \"subset\")\n})\n\n\n# don't standardize non-Gaussian response ------------------------------------\ntest_that(\"standardize non-Gaussian response\", {\n  skip_on_cran()\n  skip_if_not_installed(\"lme4\")\n  set.seed(1234)\n  data(sleepstudy, package = \"lme4\")\n\n  m1 <- glm(Reaction ~ Days, family = Gamma(), data = sleepstudy)\n  m2 <- glm(\n    Reaction ~ Days,\n    family = Gamma(link = \"identity\"),\n    data = sleepstudy\n  )\n  m3 <- glm(Reaction ~ Days, family = inverse.gaussian(), data = sleepstudy)\n\n  expect_identical(\n    coef(standardize(m1)),\n    c(`(Intercept)` = 0.00338, Days = -0.00034),\n    tolerance = 1e-2\n  )\n  expect_identical(\n    coef(standardize(m2)),\n    c(`(Intercept)` = 298.48571, Days = 29.70754),\n    tolerance = 1e-3\n  )\n  expect_identical(\n    coef(standardize(m3)),\n    c(`(Intercept)` = 1e-05, Days = 0),\n    tolerance = 1e-3\n  )\n})\n\n\n# variables evaluated in the environment $$$ ------------------------------\ntest_that(\"variables evaluated in the environment\", {\n  m <- lm(mtcars$mpg ~ mtcars$cyl + am, data = mtcars)\n  w <- capture_error(standardize(m))\n  expect_true(any(grepl(\"Using `$`\", w, fixed = TRUE)))\n\n  ## Note:\n  # No idea why this is suddenly not giving a warning on older R versions.\n  m <- lm(mtcars$mpg ~ mtcars$cyl + mtcars$am, data = mtcars)\n  w <- capture_error(standardize(m))\n  expect_true(any(grepl(\"Using `$`\", w, fixed = TRUE)))\n})\n\n\n# mediation models --------------------------------------------------------\ntest_that(\"standardize mediation\", {\n  skip_on_cran()\n  skip_if_not_installed(\"mediation\")\n  set.seed(444)\n  data(jobs, package = \"mediation\")\n  jobs$econ_hard <- jobs$econ_hard * 20\n  b.int <- lm(job_seek ~ treat * age + econ_hard + sex, data = jobs)\n  d.int <- lm(depress2 ~ treat * job_seek * age + econ_hard + sex, data = jobs)\n\n  med1 <- mediation::mediate(\n    b.int,\n    d.int,\n    sims = 200,\n    treat = \"treat\",\n    mediator = \"job_seek\"\n  )\n  med2 <- mediation::mediate(\n    b.int,\n    d.int,\n    sims = 200,\n    treat = \"treat\",\n    mediator = \"job_seek\",\n    covariates = list(age = mean(jobs$age))\n  )\n\n  out1 <- summary(standardize(med1))\n  expect_message({\n    out2 <- summary(standardize(med2))\n  })\n  expect_identical(\n    unlist(out1[c(\"d0\", \"d1\", \"z0\", \"z1\", \"n0\", \"n1\", \"tau.coef\")]),\n    unlist(out2[c(\"d0\", \"d1\", \"z0\", \"z1\", \"n0\", \"n1\", \"tau.coef\")]),\n    tolerance = 0.1\n  )\n\n  med0 <- mediation::mediate(\n    standardize(b.int),\n    standardize(d.int),\n    sims = 200,\n    treat = \"treat\",\n    mediator = \"job_seek\"\n  )\n  out0 <- summary(med0)\n  medz <- standardize(mediation::mediate(\n    b.int,\n    d.int,\n    sims = 200,\n    treat = \"treat\",\n    mediator = \"job_seek\"\n  ))\n  outz <- summary(medz)\n  expect_identical(\n    unlist(out0[c(\"d0\", \"d1\", \"z0\", \"z1\", \"n0\", \"n1\", \"tau.coef\")]),\n    unlist(outz[c(\"d0\", \"d1\", \"z0\", \"z1\", \"n0\", \"n1\", \"tau.coef\")]),\n    tolerance = 0.1\n  )\n})\n\n# Offsets -----------------------------------------------------------------\n\ntest_that(\"offsets\", {\n  skip_if_not_installed(\"effectsize\")\n  skip_if_not_installed(\"parameters\")\n\n  m <- lm(mpg ~ hp + offset(wt), data = mtcars)\n\n  expect_warning({\n    mz1 <- standardize(m)\n  })\n  expect_warning({\n    mz2 <- standardize(m, two_sd = TRUE)\n  })\n  expect_identical(c(1, 2) * coef(mz1), coef(mz2))\n\n  m <- glm(cyl ~ hp + offset(wt), family = poisson(), data = mtcars)\n  expect_warning(\n    {\n      mz <- standardize(m)\n    },\n    regexp = NA\n  )\n\n  par1 <- parameters::model_parameters(mz)\n  par2 <- effectsize::standardize_parameters(m, method = \"basic\")\n  expect_identical(par2[2, 2], par1[2, 2], tolerance = 0.05)\n})\n\n\n# BRMS --------------------------------------------------------------------\n\ntest_that(\"brms\", {\n  skip_on_cran()\n  skip_on_os(c(\"windows\", \"mac\"))\n  skip_if_not_installed(\"brms\")\n  skip_if_not_installed(\"RcppEigen\")\n  skip_if_not_installed(\"BH\")\n\n  invisible(\n    capture.output({\n      mod <- brms::brm(\n        mpg ~ hp,\n        data = mtcars,\n        refresh = 0,\n        chains = 1,\n        silent = 2\n      )\n    })\n  )\n\n  expect_warning(\n    standardize(mod),\n    regexp = \"without adjusting priors may lead to bogus\"\n  )\n})\n\n# fixest --------------------------------------------------------------------\n\ntest_that(\"fixest\", {\n  skip_if_not_installed(\"fixest\")\n\n  mtcars_stand <- standardize(mtcars)\n  orig <- fixest::feols(\n    drat ~ mpg + hp^2 | cyl + am,\n    data = mtcars,\n    se = \"hetero\"\n  )\n  # TODO: Remove this suppressWarnings() when a new version of `fixest` that\n  # contains the fix for https://github.com/lrberge/fixest/issues/618 is on CRAN\n  # (CRAN version is 0.13.2 at the time of writing).\n  suppressWarnings({\n    auto_stand <- standardize(orig)\n  })\n  manual_stand <- fixest::feols(\n    drat ~ mpg + hp^2 | cyl + am,\n    data = mtcars_stand,\n    se = \"hetero\"\n  )\n\n  # Need to unname because I(hp^2) in the manual one becomes I(I(hp ^2)) in the\n  # automated one.\n  expect_identical(\n    unname(auto_stand$coefficients),\n    unname(manual_stand$coefficients)\n  )\n  expect_identical(unname(auto_stand$se), unname(manual_stand$se))\n\n  ### Inform the user if some terms are log() or sqrt()\n  orig <- fixest::feols(\n    drat ~ log(mpg) | cyl + am,\n    data = mtcars\n  )\n  # TODO: same as above\n  expect_message(\n    suppressWarnings(standardize(orig)),\n    \"Formula contains log- or sqrt-terms\"\n  )\n  orig <- fixest::feols(\n    drat ~ sqrt(mpg) | cyl + am,\n    data = mtcars\n  )\n  # TODO: same as above\n  expect_message(\n    suppressWarnings(standardize(orig)),\n    \"Formula contains log- or sqrt-terms\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-std_center.R",
    "content": "d <- iris[1:4, ]\n\n# standardize -----------------------------------------------------\ntest_that(\"standardize.data.frame\", {\n  x <- standardise(d, select = c(\"Sepal.Length\", \"Sepal.Width\"))\n  expect_equal(\n    as.vector(x$Sepal.Length),\n    as.vector(scale(d$Sepal.Length)),\n    tolerance = 0.001\n  )\n  expect_equal(\n    as.vector(x$Petal.Length),\n    as.vector(d$Petal.Length),\n    tolerance = 0.001\n  )\n  expect_equal(\n    colnames(x),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  x <- standardise(d, select = c(\"Sepal.Length\", \"Sepal.Width\"), append = TRUE)\n  expect_equal(\n    as.vector(x$Sepal.Length_z),\n    as.vector(scale(d$Sepal.Length)),\n    tolerance = 0.001\n  )\n  expect_equal(\n    as.vector(x$Sepal.Length),\n    as.vector(d$Sepal.Length),\n    tolerance = 0.001\n  )\n  expect_equal(\n    as.vector(x$Petal.Length),\n    as.vector(d$Petal.Length),\n    tolerance = 0.001\n  )\n  expect_equal(\n    colnames(x),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"Sepal.Length_z\",\n      \"Sepal.Width_z\"\n    )\n  )\n})\n\ntest_that(\"standardize other classes\", {\n  d <- data.frame(\n    a = 1:5,\n    b = factor(letters[1:5]),\n    c = as.Date(c(\n      \"2022-03-22\",\n      \"2022-01-02\",\n      \"2022-02-02\",\n      \"2021-04-02\",\n      \"2020-01-19\"\n    )),\n    d = c(TRUE, TRUE, FALSE, FALSE, TRUE),\n    e = as.complex(1:5)\n  )\n\n  x <- standardize(d$a)\n  expect_equal(\n    as.numeric(x),\n    c(-1.26491, -0.63246, 0, 0.63246, 1.26491),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  x <- standardize(d$b)\n  expect_equal(as.numeric(x), 1:5, tolerance = 1e-3, ignore_attr = TRUE)\n  x <- standardize(d$b, force = TRUE)\n  expect_equal(\n    as.numeric(x),\n    c(-1.26491, -0.63246, 0, 0.63246, 1.26491),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  x <- standardize(d$c)\n  expect_equal(\n    x,\n    as.Date(\n      c(\n        \"2022-03-22\",\n        \"2022-01-02\",\n        \"2022-02-02\",\n        \"2021-04-02\",\n        \"2020-01-19\"\n      )\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  x <- standardize(d$c, force = TRUE)\n  expect_equal(\n    as.numeric(x),\n    c(0.76992, 0.53121, 0.62488, -0.29975, -1.62626),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  x <- standardize(d$d)\n  expect_equal(\n    x,\n    c(TRUE, TRUE, FALSE, FALSE, TRUE),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  expect_message(x <- standardize(d$d, force = TRUE))\n  expect_equal(\n    x,\n    c(0.7303, 0.7303, -1.09545, -1.09545, 0.7303),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n\n  ## TODO conflict with standardize.default() in effectsize\n\n  expect_warning(\n    x <- standardize(d$e),\n    \"cannot be standardized\"\n  )\n  expect_equal(x, d$e, tolerance = 1e-3, ignore_attr = TRUE)\n})\n\n\n# center -----------------------------------------------------\ntest_that(\"center.data.frame\", {\n  x <- center(d, select = c(\"Sepal.Length\", \"Sepal.Width\"))\n  expect_equal(\n    as.vector(x$Sepal.Length),\n    as.vector(d$Sepal.Length - mean(d$Sepal.Length)),\n    tolerance = 0.001\n  )\n  expect_equal(\n    as.vector(x$Petal.Length),\n    as.vector(d$Petal.Length),\n    tolerance = 0.001\n  )\n  expect_equal(\n    colnames(x),\n    c(\"Sepal.Length\", \"Sepal.Width\", \"Petal.Length\", \"Petal.Width\", \"Species\")\n  )\n\n  x <- center(d, select = c(\"Sepal.Length\", \"Sepal.Width\"), append = TRUE)\n  expect_equal(\n    as.vector(x$Sepal.Length_c),\n    as.vector(d$Sepal.Length - mean(d$Sepal.Length)),\n    tolerance = 0.001\n  )\n  expect_equal(\n    as.vector(x$Sepal.Length),\n    as.vector(d$Sepal.Length),\n    tolerance = 0.001\n  )\n  expect_equal(\n    as.vector(x$Petal.Length),\n    as.vector(d$Petal.Length),\n    tolerance = 0.001\n  )\n  expect_equal(\n    colnames(x),\n    c(\n      \"Sepal.Length\",\n      \"Sepal.Width\",\n      \"Petal.Length\",\n      \"Petal.Width\",\n      \"Species\",\n      \"Sepal.Length_c\",\n      \"Sepal.Width_c\"\n    )\n  )\n})\n\ntest_that(\"center other classes\", {\n  d <- data.frame(\n    a = 1:5,\n    b = factor(letters[1:5]),\n    c = as.Date(c(\n      \"2022-03-22\",\n      \"2022-01-02\",\n      \"2022-02-02\",\n      \"2021-04-02\",\n      \"2020-01-19\"\n    )),\n    d = c(TRUE, TRUE, FALSE, FALSE, TRUE),\n    e = as.complex(1:5)\n  )\n\n  x <- center(d$a)\n  expect_equal(\n    as.numeric(x),\n    c(-2, -1, 0, 1, 2),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  x <- center(d$b)\n  expect_equal(as.numeric(x), 1:5, tolerance = 1e-3, ignore_attr = TRUE)\n  x <- center(d$b, force = TRUE)\n  expect_equal(\n    as.numeric(x),\n    c(-2, -1, 0, 1, 2),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  x <- center(d$c)\n  expect_equal(\n    x,\n    as.Date(\n      c(\n        \"2022-03-22\",\n        \"2022-01-02\",\n        \"2022-02-02\",\n        \"2021-04-02\",\n        \"2020-01-19\"\n      )\n    ),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  x <- center(d$c, force = TRUE)\n  expect_equal(\n    as.numeric(x),\n    c(254.8, 175.8, 206.8, -99.2, -538.2),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  x <- center(d$d)\n  expect_equal(\n    x,\n    c(TRUE, TRUE, FALSE, FALSE, TRUE),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  expect_message(x <- center(d$d, force = TRUE))\n  expect_equal(\n    x,\n    c(0.4, 0.4, -0.6, -0.6, 0.4),\n    tolerance = 1e-3,\n    ignore_attr = TRUE\n  )\n  expect_message(x <- center(d$e))\n  expect_equal(x, d$e, tolerance = 1e-3, ignore_attr = TRUE)\n})\n"
  },
  {
    "path": "tests/testthat/test-std_center_scale_args.R",
    "content": "d <- data.frame(a = 1:5, b = 21:25, c = 41:45)\n\ntest_that(\"standardize\", {\n  x <- standardize(d)\n  expect_equal(as.vector(x$a), as.vector(scale(d$a)), tolerance = 0.001)\n})\n\ntest_that(\"standardize\", {\n  x <- standardize(d, center = 5, scale = 2)\n  expect_equal(as.vector(x$a), c(-2, -1.5, -1, -0.5, 0), tolerance = 0.001)\n})\n\ntest_that(\"standardize\", {\n  x <- standardize(d, center = c(5, 25, 45), scale = c(3, 3, 3))\n  expect_equal(\n    as.vector(x$a),\n    c(-1.33333, -1, -0.66667, -0.33333, 0),\n    tolerance = 0.001\n  )\n  expect_equal(\n    as.vector(x$b),\n    c(-1.33333, -1, -0.66667, -0.33333, 0),\n    tolerance = 0.001\n  )\n})\n\ntest_that(\"standardize\", {\n  x <- standardize(d, center = c(c = 45, a = 5, b = 25), scale = c(3, 3, 3))\n  expect_equal(\n    as.vector(x$a),\n    c(-1.33333, -1, -0.66667, -0.33333, 0),\n    tolerance = 0.001\n  )\n  expect_equal(\n    as.vector(x$b),\n    c(-1.33333, -1, -0.66667, -0.33333, 0),\n    tolerance = 0.001\n  )\n})\n\ntest_that(\"standardize\", {\n  x <- standardize(d, center = c(c = 45, a = 5, b = 25), scale = c(1, 2, 3))\n  expect_equal(as.vector(x$a), c(-4, -3, -2, -1, 0), tolerance = 0.001)\n  expect_equal(as.vector(x$b), c(-2, -1.5, -1, -0.5, 0), tolerance = 0.001)\n})\n\ntest_that(\"standardize\", {\n  x <- standardize(\n    d,\n    center = c(c = 45, a = 5, b = 25),\n    scale = c(a = 1, b = 2, c = 3)\n  )\n  expect_equal(as.vector(x$a), c(-4, -3, -2, -1, 0), tolerance = 0.001)\n  expect_equal(as.vector(x$b), c(-2, -1.5, -1, -0.5, 0), tolerance = 0.001)\n})\n\ntest_that(\"standardize\", {\n  x <- standardize(\n    d,\n    center = c(c = 45, a = 5, b = 25),\n    scale = c(c = 3, b = 2, a = 1)\n  )\n  expect_equal(as.vector(x$a), c(-4, -3, -2, -1, 0), tolerance = 0.001)\n  expect_equal(as.vector(x$b), c(-2, -1.5, -1, -0.5, 0), tolerance = 0.001)\n})\n\ntest_that(\"standardize\", {\n  x <- standardize(\n    d,\n    center = c(c = 45, a = 5, b = 25),\n    scale = c(c = 1, b = 2, a = 3)\n  )\n  expect_equal(\n    as.vector(x$a),\n    c(-1.33333, -1, -0.66667, -0.33333, 0),\n    tolerance = 0.001\n  )\n  expect_equal(as.vector(x$b), c(-2, -1.5, -1, -0.5, 0), tolerance = 0.001)\n})\n"
  },
  {
    "path": "tests/testthat/test-text_format.R",
    "content": "test_that(\"text formatting helpers work as expected\", {\n  expect_snapshot(text_format(\n    c(\n      \"A very long First\",\n      \"Some similar long Second\",\n      \"Shorter Third\",\n      \"More or less long Fourth\",\n      \"And finally the Last\"\n    ),\n    width = 20\n  ))\n\n  expect_snapshot(text_format(\n    c(\n      \"A very long First\",\n      \"Some similar long Second\",\n      \"Shorter Third\",\n      \"More or less long Fourth\",\n      \"And finally the Last\"\n    ),\n    last = \" or \",\n    enclose = \"`\",\n    width = 20\n  ))\n\n  expect_identical(\n    text_fullstop(c(\"something\", \"something else.\")),\n    c(\"something.\", \"something else.\")\n  )\n\n  expect_identical(\n    text_lastchar(c(\"ABC\", \"DEF\"), n = 2),\n    c(\"BC\", \"EF\"),\n    ignore_attr = TRUE\n  )\n\n  expect_identical(\n    text_concatenate(c(\"First\", \"Second\")),\n    \"First and Second\"\n  )\n\n  expect_identical(\n    text_concatenate(\"First\"),\n    \"First\"\n  )\n\n  expect_identical(\n    text_concatenate(c(\"First\", \"Second\", \"Last\")),\n    \"First, Second and Last\"\n  )\n\n  expect_identical(\n    text_concatenate(\n      c(\"First\", \"Second\", \"Last\"),\n      last = \" or \",\n      enclose = \"`\"\n    ),\n    \"`First`, `Second` or `Last`\"\n  )\n\n  expect_identical(\n    text_remove(c(\"one!\", \"two\", \"three!\"), \"!\"),\n    c(\"one\", \"two\", \"three\")\n  )\n\n  expect_identical(\n    text_paste(c(\"A\", \"\", \"B\"), c(\"42\", \"42\", \"42\")),\n    c(\"A, 42\", \"42\", \"B, 42\")\n  )\n\n  expect_identical(\n    text_paste(c(\"A\", \"\", \"B\"), c(\"42\", \"42\", \"42\"), enclose = \"`\"),\n    c(\"`A`, `42`\", \"`42`\", \"`B`, `42`\")\n  )\n})\n\ntest_that(\"text formatters respect `width` argument\", {\n  expect_snapshot({\n    long_text <- strrep(\"abc \", 100)\n    cat(text_format(long_text, width = 50))\n    cat(text_format(long_text, width = 80))\n\n    withr::with_options(list(width = 50), code = {\n      cat(text_format(long_text))\n    })\n  })\n})\n"
  },
  {
    "path": "tests/testthat/test-unnormalize.R",
    "content": "test_that(\"unnormalize work as expected\", {\n  x <- normalize(c(0, 1, 5, -5, -2))\n  expect_equal(\n    unnormalize(x),\n    c(0, 1, 5, -5, -2),\n    ignore_attr = TRUE\n  )\n  expect_error(\n    unnormalize(c(0, 1, 5, -5, -2)),\n    \"Can't unnormalize variable\"\n  )\n})\n\ntest_that(\"unnormalize error if not supported\", {\n  expect_error(\n    unnormalize(c(\"a\", \"b\")),\n    \"can't be unnormalized\"\n  )\n})\n\ntest_that(\"unnormalize and unstandardized x 4\", {\n  set.seed(123)\n  x <- rnorm(6, 4, 10)\n\n  z <- standardise(x)\n  expect_named(attributes(z), c(\"center\", \"scale\", \"robust\", \"class\"))\n  expect_equal(attributes(z)$center, 8.47, tolerance = 0.01)\n  expect_equal(unstandardise(z), x, ignore_attr = TRUE)\n\n  z <- center(x)\n  expect_named(attributes(z), c(\"center\", \"scale\", \"robust\", \"class\"))\n  expect_equal(unstandardise(z), x, ignore_attr = TRUE)\n\n  z <- normalize(x)\n  expect_named(\n    attributes(z),\n    c(\n      \"include_bounds\",\n      \"flag_bounds\",\n      \"min_value\",\n      \"vector_length\",\n      \"range_difference\",\n      \"class\"\n    )\n  )\n  expect_equal(unnormalize(z), x, ignore_attr = TRUE)\n\n  z <- change_scale(x, to = c(-3, 14.5))\n  expect_named(\n    attributes(z),\n    c(\n      \"min_value\",\n      \"max_value\",\n      \"new_min\",\n      \"new_max\",\n      \"range_difference\",\n      \"to_range\",\n      \"class\"\n    )\n  )\n  expect_equal(unnormalize(z), x, ignore_attr = TRUE)\n\n  z <- change_scale(x, range = c(-100, 100))\n  expect_named(\n    attributes(z),\n    c(\n      \"min_value\",\n      \"max_value\",\n      \"new_min\",\n      \"new_max\",\n      \"range_difference\",\n      \"to_range\",\n      \"class\"\n    )\n  )\n  expect_equal(unnormalize(z), x, ignore_attr = TRUE)\n})\n\n# select helpers ------------------------------\ntest_that(\"unnormalize regex\", {\n  x <- normalize(mtcars, select = \"mpg\")\n  expect_identical(\n    unnormalize(x, select = \"pg\", regex = TRUE),\n    unnormalize(x, select = \"mpg\")\n  )\n})\n\n\ntest_that(\"unnormalize: grouped data\", {\n  skip_if_not_installed(\"poorman\")\n\n  # 1 group, 1 normalized var\n  norm <- poorman::group_by(mtcars, cyl)\n  norm <- normalize(norm, \"mpg\")\n  expect_identical(\n    poorman::ungroup(unnormalize(norm, \"mpg\")),\n    mtcars,\n    ignore_attr = TRUE # unnormalize removed rownames\n  )\n\n  # 2 groups, 1 normalized var\n  set.seed(123)\n  test <- iris\n  test$grp <- sample(c(\"A\", \"B\"), nrow(test), replace = TRUE)\n  norm <- poorman::group_by(test, Species, grp)\n  norm <- normalize(norm, \"Sepal.Length\")\n  expect_identical(\n    poorman::ungroup(unnormalize(norm, \"Sepal.Length\")),\n    test\n  )\n\n  # 2 groups, 2 normalized vars\n  set.seed(123)\n  test <- iris\n  test$grp <- sample(c(\"A\", \"B\"), nrow(test), replace = TRUE)\n  norm <- poorman::group_by(test, Species, grp)\n  norm <- normalize(norm, c(\"Sepal.Length\", \"Petal.Length\"))\n  unnorm <- unnormalize(norm, c(\"Sepal.Length\", \"Petal.Length\"))\n  expect_identical(\n    poorman::ungroup(unnorm),\n    test\n  )\n\n  expect_s3_class(unnorm, \"grouped_df\")\n\n  # can't recover attributes\n  norm <- poorman::group_by(iris, Species)\n  norm <- normalize(norm, \"Sepal.Length\")\n  attr(norm, \"groups\") <- NULL\n\n  expect_error(\n    unnormalize(norm, \"Sepal.Length\"),\n    regexp = \"Couldn't retrieve the necessary information\"\n  )\n\n  # normalize applied on grouped data but unnormalize applied on ungrouped data\n  norm <- poorman::group_by(mtcars, cyl)\n  norm <- normalize(norm, \"mpg\")\n  norm <- poorman::ungroup(norm)\n\n  expect_error(\n    unnormalize(norm, \"mpg\"),\n    regexp = \"Can't unnormalize variable\"\n  )\n\n  # normalize applied on grouped data but unnormalize applied different grouped\n  # data\n  norm <- poorman::group_by(norm, am)\n  expect_error(\n    unnormalize(norm, \"mpg\"),\n    regexp = \"Couldn't retrieve the necessary\"\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-utils.R",
    "content": "test_that(\".coerce_to_dataframe works for matrices\", {\n  mat <- matrix(c(1, 2, 3, 11, 12, 13), nrow = 2, ncol = 3, byrow = TRUE)\n\n  expect_equal(\n    .coerce_to_dataframe(mat),\n    data.frame(\n      V1 = c(1, 11),\n      V2 = c(2, 12),\n      V3 = c(3, 13)\n    )\n  )\n})\n\ntest_that(\".coerce_to_dataframe works for vectors and list\", {\n  expect_equal(\n    .coerce_to_dataframe(1:3),\n    data.frame(data = 1:3)\n  )\n\n  expect_equal(\n    .coerce_to_dataframe(c(\"a\", \"b\", \"c\")),\n    data.frame(data = c(\"a\", \"b\", \"c\"), stringsAsFactors = FALSE)\n  )\n\n  expect_equal(\n    .coerce_to_dataframe(list(var1 = 1:3, var2 = 4:6)),\n    data.frame(var1 = 1:3, var2 = 4:6)\n  )\n})\n\ntest_that(\".coerce_to_dataframe errors correctly if can't coerce\", {\n  expect_error(\n    .coerce_to_dataframe(list(var1 = 1:3, var2 = 4:5)),\n    regexp = \"object that can be coerced\"\n  )\n})\n\ntest_that(\".is_sorted works\", {\n  expect_true(.is_sorted(1:3))\n  expect_true(.is_sorted(c(\"a\", \"b\", \"c\")))\n  expect_true(.is_sorted(factor(c(\"a\", \"b\", \"c\"))))\n\n  expect_false(.is_sorted(c(1, 3, 2)))\n  expect_false(.is_sorted(c(\"b\", \"a\", \"c\")))\n  expect_false(.is_sorted(factor(c(\"b\", \"a\", \"c\"))))\n})\n"
  },
  {
    "path": "tests/testthat/test-utils_cols.R",
    "content": "test_char <- data.frame(\n  a = c(\"iso\", 2, 5),\n  b = c(\"year\", 3, 6),\n  c = c(NA, 5, 7),\n  stringsAsFactors = FALSE\n)\n\ntest_num <- data.frame(\n  a = c(5, 2, 5),\n  b = c(3, 3, 6),\n  c = c(NA, 5, 7)\n)\n\ntest_na <- data.frame(\n  a = c(NA, 2, 5),\n  b = c(NA, 3, 6),\n  c = c(NA, 5, 7)\n)\n\ntest_that(\"row_to_colnames works\", {\n  test <- row_to_colnames(test_char, verbose = FALSE)\n  expect_identical(\n    colnames(test),\n    c(\"iso\", \"year\", \"x1\")\n  )\n\n  test <- row_to_colnames(test_num, verbose = FALSE)\n  expect_identical(\n    colnames(test),\n    c(\"5\", \"3\", \"x1\")\n  )\n\n  test <- row_to_colnames(test_na, verbose = FALSE)\n  expect_identical(\n    colnames(test),\n    c(\"x1\", \"x2\", \"x3\")\n  )\n})\n\ntest_that(\"row_to_colnames: check arg 'row'\", {\n  expect_error(\n    row_to_colnames(test_num, row = \"hi\", verbose = FALSE),\n    regexp = \"Argument `row`\"\n  )\n  expect_error(\n    row_to_colnames(test_num, row = 6),\n    regexp = \"You used row = 6\"\n  )\n  expect_error(\n    row_to_colnames(test_num, row = c(3, 5), verbose = FALSE),\n    regexp = \"Argument `row`\"\n  )\n  expect_identical(\n    row_to_colnames(test_num, verbose = FALSE),\n    row_to_colnames(test_num, row = 1, verbose = FALSE)\n  )\n})\n\ntest_that(\"row_to_colnames: check arg 'na_prefix'\", {\n  test <- row_to_colnames(test_char, na_prefix = \"foo\", verbose = FALSE)\n  expect_identical(\n    colnames(test),\n    c(\"iso\", \"year\", \"foo1\")\n  )\n\n  test <- row_to_colnames(test_num, na_prefix = \"foo\", verbose = FALSE)\n  expect_identical(\n    colnames(test),\n    c(\"5\", \"3\", \"foo1\")\n  )\n})\n\n#-----------------------------------------------------\n\nfoo <- data.frame(\n  ARG = c(\"BRA\", \"FRA\"),\n  `1960` = c(1960, 1960),\n  `2000` = c(2000, 2000),\n  stringsAsFactors = FALSE\n)\n\ntest_that(\"colnames_to_row works\", {\n  test <- colnames_to_row(foo)\n  expect_identical(\n    colnames(test),\n    c(\"x1\", \"x2\", \"x3\")\n  )\n  expect_true(\n    all(\n      test[1, 1] == \"ARG\",\n      test[1, 2] == \"X1960\",\n      test[1, 3] == \"X2000\"\n    )\n  )\n  expect_s3_class(test, \"data.frame\")\n})\n\ntest_that(\"colnames_to_row: check arg 'prefix'\", {\n  test <- colnames_to_row(foo, prefix = \"hi\")\n  expect_identical(\n    colnames(test),\n    c(\"hi1\", \"hi2\", \"hi3\")\n  )\n  expect_error(\n    colnames_to_row(test_num, prefix = 6),\n    regexp = \"Argument `prefix`\"\n  )\n  expect_error(\n    colnames_to_row(test_num, prefix = c(\"A\", \"B\")),\n    regexp = \"Argument `prefix`\"\n  )\n  expect_identical(\n    colnames_to_row(test),\n    colnames_to_row(test, prefix = \"x\")\n  )\n})\n"
  },
  {
    "path": "tests/testthat/test-utils_rows.R",
    "content": "test_that(\"rownames_as_column works\", {\n  test <- rownames_as_column(mtcars, \"new_column\")\n  expect_true(\"new_column\" %in% names(test))\n  expect_identical(test[1, \"new_column\"], \"Mazda RX4\")\n})\n\ntest_that(\"rownames_as_column doesn't work if var is not a character\", {\n  expect_error(\n    rownames_as_column(mtcars, var = 1),\n    regexp = \"Argument 'var' must be of type character\"\n  )\n  expect_error(\n    rownames_as_column(mtcars, var = TRUE),\n    regexp = \"Argument 'var' must be of type character\"\n  )\n})\n\ntest_that(\"rownames_as_column uses 'rowname' as default column name\", {\n  test <- rownames_as_column(mtcars, var = NULL)\n  expect_true(\"rowname\" %in% names(test))\n})\n\ntest_that(\"rownames_as_column preserves labels\", {\n  test_data <- mtcars\n  test_data <- assign_labels(test_data, select = \"hp\", variable = \"horsepower\")\n\n  # ungrouped\n  with_id <- rownames_as_column(test_data)\n  expect_identical(\n    attributes(with_id$hp)$label,\n    \"horsepower\"\n  )\n\n  # grouped\n  with_id_grouped <- data_group(test_data, \"cyl\")\n  with_id_grouped <- rownames_as_column(with_id_grouped)\n  expect_identical(\n    attributes(with_id_grouped$hp)$label,\n    \"horsepower\"\n  )\n})\n\ntest_that(\"rownames_as_column preserves other attribs\", {\n  test_data <- standardize(mtcars)\n\n  # ungrouped\n  with_id <- rownames_as_column(test_data)\n  expect_false(is.null(attributes(with_id)$center))\n\n  # grouped\n  with_id_grouped <- data_group(test_data, \"cyl\")\n  with_id_grouped <- rownames_as_column(with_id_grouped)\n  expect_false(is.null(attributes(with_id_grouped)$center))\n})\n\ntest_that(\"rownames_as_column errors if already var of same name\", {\n  expect_error(\n    rownames_as_column(mtcars, \"mpg\"),\n    \"already a variable named\"\n  )\n})\n\n#-------------------------------------------------\n\ntest_that(\"rowid_as_column works\", {\n  test <- rowid_as_column(mtcars, \"new_column\")\n  expect_true(\"new_column\" %in% names(test))\n  expect_identical(test$new_column, 1:32)\n})\n\ntest_that(\"rowid_as_column works with grouped data\", {\n  test_data <- data_group(iris, \"Species\")\n  test <- rowid_as_column(test_data)\n  expect_identical(test$rowid, rep(1:50, 3))\n  expect_true(\"rowid\" %in% names(test))\n})\n\ntest_that(\"rowid_as_column doesn't work if var is not a character\", {\n  expect_error(\n    rowid_as_column(mtcars, var = 1),\n    regexp = \"Argument 'var' must be of type character\"\n  )\n  expect_error(\n    rowid_as_column(mtcars, var = TRUE),\n    regexp = \"Argument 'var' must be of type character\"\n  )\n})\n\ntest_that(\"rowid_as_column uses 'rowid' as default column name\", {\n  test <- rowid_as_column(mtcars, var = NULL)\n  expect_true(\"rowid\" %in% names(test))\n})\n\ntest_that(\"rowid_as_column preserves labels\", {\n  test_data <- mtcars\n  test_data <- assign_labels(test_data, select = \"hp\", variable = \"horsepower\")\n\n  # ungrouped\n  with_id <- rowid_as_column(test_data)\n  expect_identical(\n    attributes(with_id$hp)$label,\n    \"horsepower\"\n  )\n\n  # grouped\n  with_id_grouped <- data_group(test_data, \"cyl\")\n  with_id_grouped <- rowid_as_column(with_id_grouped)\n  expect_identical(\n    attributes(with_id_grouped$hp)$label,\n    \"horsepower\"\n  )\n})\n\ntest_that(\"rowid_as_column preserves other attribs\", {\n  test_data <- standardize(mtcars)\n\n  # ungrouped\n  with_id <- rowid_as_column(test_data)\n  expect_false(is.null(attributes(with_id)$center))\n\n  # grouped\n  with_id_grouped <- data_group(test_data, \"cyl\")\n  with_id_grouped <- rowid_as_column(with_id_grouped)\n  expect_false(is.null(attributes(with_id_grouped)$center))\n})\n\ntest_that(\"rowid_as_column has no issue if another variable is called 'var'\", {\n  foo <- data.frame(\n    grp = c(\"A\", \"A\", \"B\", \"B\"),\n    var = 1:4,\n    stringsAsFactors = FALSE\n  )\n\n  out <- data_group(foo, grp)\n  out <- rowid_as_column(out)\n  expect_named(out, c(\"rowid\", \"grp\", \"var\"))\n})\n\ntest_that(\"rowid_as_column errors if already var of same name\", {\n  expect_error(\n    rowid_as_column(mtcars, \"mpg\"),\n    \"already a variable named\"\n  )\n})\n\n#-------------------------------------------------\n\ntest_that(\"column_as_rownames works\", {\n  continents <- c(\n    \"Africa\",\n    \"Asia\",\n    \"Europe\",\n    \"North America\",\n    \"Oceania\",\n    \"South America\"\n  )\n  test <- data.frame(\n    continent = continents,\n    some_value = seq(1, 6, by = 1)\n  )\n  test2 <- column_as_rownames(test, \"continent\")\n  expect_identical(rownames(test2), continents)\n  expect_identical(ncol(test2), 1L)\n\n  test3 <- column_as_rownames(test, 1)\n  expect_identical(rownames(test3), continents)\n  expect_identical(ncol(test3), 1L)\n})\n\ntest_that(\"column_as_rownames sanity checks work\", {\n  continents <- c(\n    \"Africa\",\n    \"Asia\",\n    \"Europe\",\n    \"North America\",\n    \"Oceania\",\n    \"South America\"\n  )\n  test <- data.frame(\n    continent = continents,\n    some_value = seq(1, 6, by = 1)\n  )\n  expect_error(column_as_rownames(test, TRUE), regexp = \"Argument `var`\")\n  expect_error(\n    column_as_rownames(test, \"foo\"),\n    regexp = \"not in the data frame\"\n  )\n  expect_error(column_as_rownames(test, 0), regexp = \"does not exist\")\n  expect_error(column_as_rownames(test, 3), regexp = \"does not exist\")\n})\n\ntest_that(\"rownames_as_column and column_as_rownames cancel each other\", {\n  test <- rownames_as_column(mtcars)\n  test2 <- column_as_rownames(test)\n  expect_identical(test2, mtcars)\n})\n\ntest_that(\"column_as_rownames preserves labels\", {\n  test_data <- rownames_as_column(mtcars)\n  test_data <- assign_labels(test_data, select = \"hp\", variable = \"horsepower\")\n\n  # ungrouped\n  with_id <- column_as_rownames(test_data)\n  expect_identical(\n    attributes(with_id$hp)$label,\n    \"horsepower\"\n  )\n\n  # grouped\n  with_id_grouped <- data_group(test_data, \"cyl\")\n  with_id_grouped <- column_as_rownames(with_id_grouped)\n  expect_identical(\n    attributes(with_id_grouped$hp)$label,\n    \"horsepower\"\n  )\n})\n\n\ntest_that(\"column_as_rownames preserves other attribs\", {\n  test_data <- rownames_as_column(standardize(mtcars))\n\n  # ungrouped\n  with_id <- column_as_rownames(test_data, \"rowname\")\n  expect_false(is.null(attributes(with_id)$center))\n\n  # grouped\n  with_id_grouped <- data_group(test_data, \"cyl\")\n  with_id_grouped <- column_as_rownames(with_id_grouped)\n  expect_false(is.null(attributes(with_id_grouped)$center))\n})\n"
  },
  {
    "path": "tests/testthat/test-weighted-stats.R",
    "content": "test_that(\"weighted centrality and dispersion measures work as expected\", {\n  x <- c(3.7, 3.3, 3.5, 2.8)\n  wt <- c(5, 5, 4, 1) / 15\n\n  set.seed(123)\n  expect_equal(weighted_mean(x, wt), 3.453333, tolerance = 0.001)\n  expect_equal(weighted_median(x, wt), 3.5, tolerance = 0.001)\n  expect_equal(weighted_sd(x, wt), 0.2852935, tolerance = 0.001)\n  expect_equal(weighted_mad(x, wt), 0.29652, tolerance = 0.001)\n})\n\ntest_that(\"weighted centrality and dispersion measures work with NA\", {\n  x <- c(3.7, 3.3, NA, 3.5, 2.8, 5.5)\n  wt <- c(5, 5, 4, NA, 1, 7) / 15\n\n  set.seed(123)\n  expect_equal(weighted_mean(x, wt), 4.238889, tolerance = 0.001)\n  expect_equal(weighted_median(x, wt), 3.7, tolerance = 0.001)\n  expect_equal(weighted_sd(x, wt), 1.237671, tolerance = 0.001)\n  expect_equal(weighted_mad(x, wt), 0.59304, tolerance = 0.001)\n})\n\ntest_that(\"weighted centrality and dispersion measures work with NA when not removed\", {\n  x <- c(3.7, 3.3, NA, 3.5, 2.8, 5.5)\n  wt <- c(5, 5, 4, NA, 1, 7) / 15\n\n  set.seed(123)\n  expect_identical(weighted_mean(x, wt, remove_na = FALSE), NA_real_)\n  expect_identical(weighted_median(x, wt, remove_na = FALSE), NA_real_)\n  expect_identical(weighted_sd(x, wt, remove_na = FALSE), NA_real_)\n  expect_identical(weighted_mad(x, wt, remove_na = FALSE), NA_real_)\n})\n\ntest_that(\"weighted centrality and dispersion measures work with Inf\", {\n  x <- c(3.7, 3.3, NA, 3.5, 2.8, 5.5, Inf, 4)\n  wt <- c(5, 5, 4, NA, 1, 7, 3, Inf) / 15\n\n  set.seed(123)\n  expect_equal(weighted_mean(x, wt), 4.238889, tolerance = 0.001)\n  expect_equal(weighted_median(x, wt), 3.7, tolerance = 0.001)\n  expect_equal(weighted_sd(x, wt), 1.237671, tolerance = 0.001)\n  expect_equal(weighted_mad(x, wt), 0.59304, tolerance = 0.001)\n})\n"
  },
  {
    "path": "tests/testthat/test-winsorization.R",
    "content": "test_that(\"testing Winsorization of factors\", {\n  expect_identical(winsorize(as.factor(mtcars$am)), as.factor(mtcars$am))\n})\n\ntest_that(\"with missing values\", {\n  skip_if_not_installed(\"ggplot2\")\n\n  expect_snapshot(suppressWarnings(head(winsorize(na.omit(\n    ggplot2::msleep$brainwt\n  )))))\n  expect_length(winsorize(as.factor(ggplot2::msleep$vore)), 83L)\n})\n\ntest_that(\"winsorize: threshold must be between 0 and 1\", {\n  expect_error(\n    winsorize(sample(1:10, 5), threshold = -0.1),\n    regexp = \"must be a scalar between 0 and 0.5\"\n  )\n  expect_error(\n    winsorize(sample(1:10, 5), threshold = 1.1),\n    regexp = \"must be a scalar between 0 and 0.5\"\n  )\n  expect_error(\n    winsorize(sample(1:10, 5), method = \"zscore\", threshold = -3),\n    regexp = \"must be a scalar greater than 0\"\n  )\n  expect_error(\n    winsorize(\n      sample(1:10, 5),\n      method = \"zscore\",\n      threshold = -3,\n      robust = TRUE\n    ),\n    regexp = \"must be a scalar greater than 0\"\n  )\n  expect_error(\n    winsorize(sample(1:10, 5), method = \"raw\", threshold = 1.1),\n    regexp = \"must be of length 2 for lower and upper bound\"\n  )\n})\n\ntest_that(\"winsorize on data.frame\", {\n  iris2 <- winsorize(iris)\n  expect_identical(\n    iris2$Sepal.Length,\n    winsorize(iris$Sepal.Length)\n  )\n  expect_identical(\n    iris2$Petal.Width,\n    winsorize(iris$Petal.Width)\n  )\n  expect_named(iris2, names(iris))\n})\n"
  },
  {
    "path": "tests/testthat.R",
    "content": "library(testthat)\nlibrary(datawizard)\n\ntest_check(\"datawizard\")\n"
  },
  {
    "path": "vignettes/.gitignore",
    "content": "*.html\n*.R\n"
  },
  {
    "path": "vignettes/bibliography.bib",
    "content": "@Manual{revelle2018,\n  title = {psych: Procedures for Psychological, Psychometric, and Personality Research},\n  author = {William Revelle},\n  organization = { Northwestern University},\n  address = { Evanston, Illinois},\n  year = {2018},\n  note = {R package version 1.8.12},\n  url = {https://CRAN.R-project.org/package=psych}\n}\n\n\n@article{makowski2018psycho,\n  title={The psycho package: An efficient and publishing-oriented workflow for psychological science},\n  author={Makowski, Dominique},\n  journal={Journal of Open Source Software},\n  volume={3},\n  number={22},\n  pages={470},\n  year={2018}\n}\n\n\n@article{menard2011standards,\n  title={Standards for standardized logistic regression coefficients},\n  author={Menard, Scott},\n  journal={Social Forces},\n  volume={89},\n  number={4},\n  pages={1409--1428},\n  year={2011},\n  publisher={The University of North Carolina Press}\n}\n\n\n@article{schielzeth2010simple,\n  title={Simple means to improve the interpretability of regression coefficients},\n  author={Schielzeth, Holger},\n  journal={Methods in Ecology and Evolution},\n  volume={1},\n  number={2},\n  pages={103--113},\n  year={2010},\n  publisher={Wiley Online Library}\n}\n\n\n@article{gelman2008scaling,\n  title={Scaling regression inputs by dividing by two standard deviations},\n  author={Gelman, Andrew},\n  journal={Statistics in medicine},\n  volume={27},\n  number={15},\n  pages={2865--2873},\n  year={2008},\n  publisher={Wiley Online Library}\n}\n\n\n@article{menard2004six,\n  title={Six approaches to calculating standardized logistic regression coefficients},\n  author={Menard, Scott},\n  journal={The American Statistician},\n  volume={58},\n  number={3},\n  pages={218--223},\n  year={2004},\n  publisher={Taylor \\& Francis}\n}\n\n\n@article{bring1994standardize,\n  title={How to standardize regression coefficients},\n  author={Bring, Johan},\n  journal={The American Statistician},\n  volume={48},\n  number={3},\n  pages={209--213},\n  year={1994},\n  publisher={Taylor \\& Francis}\n}\n\n\n@article{neter1989applied,\n  title={Applied linear regression models},\n  author={Neter, John and Wasserman, William and Kutner, Michael H},\n  year={1989},\n  publisher={Irwin Homewood, IL}\n}\n\n\n@book{gelman_data_2007,\n\taddress = {Cambridge; New York},\n\tseries = {Analytical methods for social research},\n\ttitle = {Data analysis using regression and multilevel/hierarchical models},\n\tisbn = {978-0-521-86706-1 978-0-521-68689-1},\n\tpublisher = {Cambridge University Press},\n\tauthor = {Gelman, Andrew and Hill, Jennifer},\n\tyear = {2007}\n}\n\n\n@inproceedings{bafumi_fitting_2006,\n\taddress = {Philadelphia, PA},\n\ttitle = {Fitting Multilevel Models When Predictors and Group Effects Correlate.},\n\tauthor = {Bafumi, Joseph and Gelman, Andrew},\n\tyear = {2006}\n}\n\n\n@article{bell_fixed_2019,\n\ttitle = {Fixed and random effects models: making an informed choice},\n\tissn = {1573-7845},\n\turl = {https://doi.org/10.1007/s11135-018-0802-x},\n\tdoi = {10.1007/s11135-018-0802-x},\n\tjournal = {Quality \\& Quantity},\n\tauthor = {Bell, Andrew and Fairbrother, Malcolm and Jones, Kelvyn},\n\tvolume = {53},\n\tyear = {2019},\n\tpages = {1051--1074}\n}\n\n\n@article{bell_explaining_2015,\n\ttitle = {Explaining Fixed Effects: Random Effects Modeling of Time-Series Cross-Sectional and Panel Data},\n\tvolume = {3},\n\tissn = {2049-8470, 2049-8489},\n\turl = {https://www.cambridge.org/core/product/identifier/S2049847014000077/type/journal_article},\n\tdoi = {10.1017/psrm.2014.7},\n\tnumber = {1},\n\tjournal = {Political Science Research and Methods},\n\tauthor = {Bell, Andrew and Jones, Kelvyn},\n\tmonth = jan,\n\tyear = {2015},\n\tpages = {133--153}\n}\n\n\n\n@article{heisig_costs_2017,\n\ttitle = {The Costs of Simplicity: Why Multilevel Models May Benefit from Accounting for Cross-Cluster Differences in the Effects of Controls},\n\tvolume = {82},\n\tissn = {0003-1224, 1939-8271},\n\turl = {http://journals.sagepub.com/doi/10.1177/0003122417717901},\n\tdoi = {10.1177/0003122417717901},\n\tnumber = {4},\n\tjournal = {American Sociological Review},\n\tauthor = {Heisig, Jan Paul and Schaeffer, Merlin and Giesecke, Johannes},\n\tmonth = aug,\n\tyear = {2017},\n\tpages = {796--827}\n}\n\n\n\n@article{shor_bayesian_2007,\n\ttitle = {A Bayesian Multilevel Modeling Approach to Time-Series Cross-Sectional Data},\n\tvolume = {15},\n\tissn = {1047-1987, 1476-4989},\n\tdoi = {10.1093/pan/mpm006},\n\tnumber = {2},\n\tjournal = {Political Analysis},\n\tauthor = {Shor, Boris and Bafumi, Joseph and Keele, Luke and Park, David},\n\tyear = {2007},\n\tpages = {165--181}\n}\n\n\n\n\n@article{mundlak_pooling_1978,\n\ttitle = {On the Pooling of Time Series and Cross Section Data},\n\tvolume = {46},\n\tnumber = {1},\n\tjournal = {Econometrica},\n\tauthor = {Mundlak, Yair},\n\tmonth = jan,\n\tyear = {1978},\n\tpages = {69}\n}\n"
  },
  {
    "path": "vignettes/overview_of_vignettes.Rmd",
    "content": "---\ntitle: \"Overview of Vignettes\"\noutput: rmarkdown::html_vignette\nvignette: >\n  %\\VignetteIndexEntry{Overview of Vignettes}\n  %\\VignetteEngine{knitr::rmarkdown}\n  %\\VignetteEncoding{UTF-8}\n---\n\n```{r message=FALSE, warning=FALSE, include=FALSE}\nlibrary(knitr)\nknitr::opts_chunk$set(\n  echo = TRUE,\n  collapse = TRUE,\n  warning = FALSE,\n  message = FALSE,\n  comment = \"#>\",\n  eval = TRUE\n)\n```\n\nAll package vignettes are available at [https://easystats.github.io/datawizard/](https://easystats.github.io/datawizard/).\n\n## Function Overview\n\n* [Function Reference](https://easystats.github.io/datawizard/reference/index.html)\n\n\n## Data Preparation\n\n* [Coming from 'tidyverse'](https://easystats.github.io/datawizard/articles/tidyverse_translation.html)\n* [A quick summary of selection syntax in `{datawizard}`](https://easystats.github.io/datawizard/articles/selection_syntax.html)\n\n\n## Statistical Transformations\n\n* [Data Standardization](https://easystats.github.io/datawizard/articles/standardize_data.html)\n"
  },
  {
    "path": "vignettes/selection_syntax.Rmd",
    "content": "---\ntitle: \"A quick summary of selection syntax in `{datawizard}`\"\noutput: rmarkdown::html_vignette\nvignette: >\n  %\\VignetteIndexEntry{A quick summary of selection syntax in `{datawizard}`}\n  %\\VignetteEngine{knitr::rmarkdown}\n  %\\VignetteEncoding{UTF-8}\n---\n\n```{r setup, include = FALSE}\nknitr::opts_chunk$set(\n  collapse = TRUE,\n  comment = \"#>\"\n)\n\npkgs <- c(\n  \"datawizard\",\n  \"dplyr\"\n)\n\nif (!all(vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1L)))) {\n  knitr::opts_chunk$set(eval = FALSE)\n}\n```\n\n```{r load, echo=FALSE, message=FALSE}\nlibrary(datawizard)\nlibrary(dplyr)\n\nset.seed(123)\niris <- iris[sample(nrow(iris), 10), ]\nrow.names(iris) <- NULL\n```\n\n```{css, echo=FALSE}\n.custom_note {\n  border-left: solid 5px hsl(220, 100%, 30%);\n  background-color: hsl(220, 100%, 95%);\n  padding: 5px;\n  margin-bottom: 10px\n}\n```\n\nThis vignette can be referred to by citing the following:\n\nPatil et al., (2022). datawizard: An R Package for Easy Data Preparation and Statistical Transformations. *Journal of Open Source Software*, *7*(78), 4684, https://doi.org/10.21105/joss.04684\n\n# Selecting variables\n\n## Quoted names\n\nThis is the most simple way to select one or several variables. Just use a character\nvector containing variables names, like in base R.\n\n```{r}\ndata_select(iris, c(\"Sepal.Length\", \"Petal.Width\"))\n```\n\n## Unquoted names\n\nIt is also possible to use unquoted names. This is useful if we use the `tidyverse`\nand want to be consistent about the way variable names are passed.\n\n```{r}\niris %>%\n  group_by(Species) %>%\n  standardise(Petal.Length) %>%\n  ungroup()\n```\n\n\n## Positions\n\nIn addition to variable names, `select` can also take indices for the variables \nto select in the dataframe.\n\n```{r}\ndata_select(iris, c(1, 2, 5))\n```\n\n\n## Functions\n\nWe can also pass a function to the `select` argument. This function will be applied\nto all columns and should return `TRUE` or `FALSE`. For example, if we want to \nkeep only numeric columns, we can use `is.numeric`.\n\n```{r}\ndata_select(iris, is.numeric)\n```\n\nNote that we can provide any custom function to `select`, *provided it returns `TRUE` or `FALSE`* when applied to a column.\n\n```{r}\nmy_function <- function(i) {\n  is.numeric(i) && mean(i, na.rm = TRUE) > 3.5\n}\n\ndata_select(iris, my_function)\n```\n\n\n## Patterns\n\nWith larger datasets, it would be tedious to write the names of variables to select,\nand it would be fragile to rely on variable positions as they may change later. \nTo this end, we can use four select helpers: `starts_with()`, `ends_with()`,\n`contains()`, and `regex()`. The first three can take several patterns, while\n`regex()` takes a single regular expression.\n \n```{r}\ndata_select(iris, starts_with(\"Sep\", \"Peta\"))\n\ndata_select(iris, ends_with(\"dth\", \"ies\"))\n\ndata_select(iris, contains(\"pal\", \"ec\"))\n\ndata_select(iris, regex(\"^Sep|ies\"))\n```\n\n\n<!-- NOTE: use raw HTML so that vignette can compile even if `evaluate_chunk` is FALSE. -->\n<!-- See e.g. #527 -->\n\n<div class=\"custom_note\">\n  <p>\n    Note: these functions are not exported by `datawizard` but are detected and\n    applied internally. This means that they won't be detected by autocompletion\n    when we write them.\n  </p>\n</div>\n\n<div class=\"custom_note\">\n  <p>\n    Note #2: because these functions are not exported, they will not create\n    conflicts with the ones that come from the `tidyverse` and that have the same \n    name. Therefore, we can still use `dplyr` and its friends, it won't change \n    anything for selection in `datawizard` functions!\n  </p>\n</div>\n\n\n# Excluding variables\n\nWhat if we want to keep all variables except for a few ones? There are two ways\nwe can invert our selection.\n\nThe first way is to put a minus sign `\"-\"` in front of the `select` argument.\n\n```{r}\ndata_select(iris, -c(\"Sepal.Length\", \"Petal.Width\"))\n\ndata_select(iris, -starts_with(\"Sep\", \"Peta\"))\n\ndata_select(iris, -is.numeric)\n```\n\nNote that if we use numeric indices, we can't mix negative and positive values. \nThis means that we have to use `select = -(1:2)` if we want to exclude the first\ntwo columns; `select = -1:2` will *not* work:\n\n```{r}\ndata_select(iris, -(1:2))\n```\n\nSame thing for variable names:\n\n```{r}\ndata_select(iris, -(Petal.Length:Species))\n```\n\nThe second way is to use the argument `exclude`. This argument has the same \npossibilities as `select`. Although this may not be required in most contexts, \nif we wanted to, we could use both `select` and `exclude` arguments at the same \ntime.\n\n```{r}\ndata_select(iris, exclude = c(\"Sepal.Length\", \"Petal.Width\"))\n\ndata_select(iris, exclude = starts_with(\"Sep\", \"Peta\"))\n```\n\n# Programming with selections\n\nSince `datawizard` 0.6.0, it is possible to pass function arguments and loop indices\nin `select` and `exclude` arguments. This makes it easier to program with\n`datawizard`.\n\nFor example, if we want to let the user decide the selection they want to use:\n\n```{r}\nmy_function <- function(data, selection) {\n  extract_column_names(data, select = selection)\n}\nmy_function(iris, \"Sepal.Length\")\nmy_function(iris, starts_with(\"Sep\"))\n\nmy_function_2 <- function(data, pattern) {\n  extract_column_names(data, select = starts_with(pattern))\n}\nmy_function_2(iris, \"Sep\")\n```\n\nIt is also possible to pass these values in loops, for example if we have a list \nof patterns and we want to relocate columns based on these patterns, one by one:\n\n```{r}\nnew_iris <- iris\nfor (i in c(\"Sep\", \"Pet\")) {\n  new_iris <- new_iris %>%\n    data_relocate(select = starts_with(i), after = -1)\n}\nnew_iris\n```\n\nIn the loop above, all columns starting with `\"Sep\"` are moved at the end of the\ndata frame, and the same thing was made with all columns starting with `\"Pet\"`.\n\n\n\n# Useful to know\n\n## Ignore the case\n\nIn every selection that uses variable names, we can ignore the case in the \nselection by applying `ignore_case = TRUE`.\n\n```{r}\ndata_select(iris, c(\"sepal.length\", \"petal.width\"), ignore_case = TRUE)\n\ndata_select(iris, ~ Sepal.length + petal.Width, ignore_case = TRUE)\n\ndata_select(iris, starts_with(\"sep\", \"peta\"), ignore_case = TRUE)\n```\n\n## Formulas\n\nIt is also possible to use formulas to select variables:\n\n```{r}\ndata_select(iris, ~ Sepal.Length + Petal.Width)\n```\n\nThis made it easier to use selection in custom functions before `datawizard` \n0.6.0, and is kept available for backward compatibility. \n\n"
  },
  {
    "path": "vignettes/standardize_data.Rmd",
    "content": "---\ntitle: \"Data Standardization\"\noutput: \n  rmarkdown::html_vignette:\n    toc: true\n    fig_width: 10.08\n    fig_height: 6\nvignette: >\n  \\usepackage[utf8]{inputenc}\n  %\\VignetteIndexEntry{Data Standardization}\n  %\\VignetteEngine{knitr::rmarkdown}\n---\n\n```{r message=FALSE, warning=FALSE, include=FALSE}\noptions(knitr.kable.NA = \"\")\nknitr::opts_chunk$set(\n  comment = \"#>\",\n  message = FALSE,\n  warning = FALSE,\n  dpi = 300\n)\n\npkgs <- c(\n  \"datawizard\",\n  \"poorman\",\n  \"see\",\n  \"ggplot2\",\n  \"parameters\",\n  \"lme4\",\n  \"curl\"\n)\npkg_available <- all(vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1L)))\n\nif (pkg_available) {\n  net_available <- curl::has_internet()\n} else {\n  net_available <- FALSE\n}\n\nif (!pkg_available || !net_available) {\n  knitr::opts_chunk$set(eval = FALSE)\n}\n```\n\nThis vignette can be referred to by citing the following:\n\n> Patil et al., (2022). datawizard: An R Package for Easy Data Preparation and Statistical Transformations. *Journal of Open Source Software*, *7*(78), 4684, https://doi.org/10.21105/joss.04684\n\n# Introduction\n\nTo make sense of their data and effects, scientists might want to standardize\n(Z-score) their variables. This makes the data unitless, expressed only in terms\nof deviation from an index of centrality (e.g., the mean or the median).\nHowever, aside from some benefits, standardization also comes with challenges\nand issues, that the scientist should be aware of.\n\n## Methods of Standardization\n\nThe `datawizard` package offers two methods of standardization via the\n`standardize()` function:\n\n- **Normal standardization**: center around the *mean*, with *SD* units\n  (default).\n\n- **Robust standardization**: center around the *median*, with *MAD* (median\n  absolute deviation) units (`robust = TRUE`).\n\nLet's look at the following example:\n\n```{r}\nlibrary(datawizard)\nlibrary(effectsize) # for data\n\n# let's have a look at what the data look like\ndata(\"hardlyworking\", package = \"effectsize\")\nhead(hardlyworking)\n\n# let's use both methods of standardization\nhardlyworking$xtra_hours_z <- standardize(hardlyworking$xtra_hours)\nhardlyworking$xtra_hours_zr <- standardize(hardlyworking$xtra_hours, robust = TRUE)\n```\n\nWe can see that different methods give different central and variation values:\n\n```{r, eval=FALSE}\nlibrary(dplyr)\n\nhardlyworking %>%\n  select(starts_with(\"xtra_hours\")) %>%\n  data_to_long() %>%\n  group_by(Name) %>%\n  summarise(\n    mean = mean(Value),\n    sd = sd(Value),\n    median = median(Value),\n    mad = mad(Value)\n  )\n```\n\n```{r, echo=FALSE}\nlibrary(poorman)\n\nhardlyworking %>%\n  select(starts_with(\"xtra_hours\")) %>%\n  reshape_longer(names_to = \"name\", values_to = \"value\") %>%\n  group_by(name) %>%\n  summarise(\n    mean = mean(value),\n    sd = sd(value),\n    median = median(value),\n    mad = mad(value)\n  ) %>%\n  knitr::kable(digits = 4)\n```\n\n`standardize()` can also be used to standardize a full data frame - where each\nnumeric variable is standardized separately:\n\n```{r}\nhardlyworking_z <- standardize(hardlyworking)\n```\n\n```{r, eval=FALSE}\nhardlyworking_z %>%\n  select(-xtra_hours_z, -xtra_hours_zr) %>%\n  data_to_long() %>%\n  group_by(Name) %>%\n  summarise(\n    mean = mean(Value),\n    sd = sd(Value),\n    median = median(Value),\n    mad = mad(Value)\n  )\n```\n\n```{r, echo=FALSE}\nhardlyworking_z %>%\n  select(-xtra_hours_z, -xtra_hours_zr) %>%\n  reshape_longer(names_to = \"name\", values_to = \"value\") %>%\n  group_by(name) %>%\n  summarise(\n    mean = mean(value),\n    sd = sd(value),\n    median = median(value),\n    mad = mad(value)\n  ) %>%\n  knitr::kable(digits = 4)\n```\n\nWeighted standardization is also supported via the `weights` argument, and\nfactors can also be standardized (if you're into that kind of thing) by setting\n`force = TRUE`, which converts factors to treatment-coded dummy variables before\nstandardizing.\n\n## Variable-wise *vs.* Participant-wise \n\nStandardization is an important step and extra caution is required in\n**repeated-measures designs**, in which there are three ways of standardizing\ndata:\n\n- **Variable-wise**: The most common method. A simple scaling of each column.\n\n- **Participant-wise**: Variables are standardized \"within\" each participant,\n  *i.e.*, for each participant, by the participant's mean and SD.\n\n- **Full**: Participant-wise first and then re-standardizing variable-wise.\n\nUnfortunately, the method used is often not explicitly stated. This is an issue\nas these methods can generate important discrepancies (that can in turn\ncontribute to the reproducibility crisis). Let's investigate these 3 methods.\n\n### The Data\n\nWe will take the `emotion` dataset in which participants were exposed to\nnegative pictures and had to rate their emotions (**valence**) and the amount of\nmemories associated with the picture (**autobiographical link**). One could make\nthe hypothesis that for young participants with no context of war or violence,\nthe most negative pictures (mutilations) are less related to memories than less\nnegative pictures (involving for example car crashes or sick people). In other\nwords, **we expect a positive relationship between valence** (with high values\ncorresponding to less negativity) **and autobiographical link**.\n\nLet's have a look at the data, averaged by participants:\n\n```{r, eval=FALSE}\n# Download the 'emotion' dataset\nload(url(\"https://raw.githubusercontent.com/neuropsychology/psycho.R/master/data/emotion.rda\"))\n\n# Discard neutral pictures (keep only negative)\nemotion <- emotion %>% filter(Emotion_Condition == \"Negative\")\n\n# Summary\nemotion %>%\n  drop_na(Subjective_Valence, Autobiographical_Link) %>%\n  group_by(Participant_ID) %>%\n  summarise(\n    n_Trials = n(),\n    Valence_Mean = mean(Subjective_Valence),\n    Valence_SD = sd(Subjective_Valence)\n  )\n```\n\n```{r, echo=FALSE}\nload(url(\"https://raw.githubusercontent.com/neuropsychology/psycho.R/master/data/emotion.rda\"))\n\n# Discard neutral pictures (keep only negative)\nemotion <- emotion %>% filter(Emotion_Condition == \"Negative\")\n\n# Summary\nemotion %>%\n  subset(!(is.na(Subjective_Valence) | is.na(Autobiographical_Link))) %>%\n  group_by(Participant_ID) %>%\n  summarise(\n    n_Trials = n(),\n    Valence_Mean = mean(Subjective_Valence),\n    Valence_SD = sd(Subjective_Valence)\n  )\n```\n\nAs we can see from the means and SDs, there is a lot of variability **between**\nparticipants both in their means and their individual *within*-participant SD.\n\n### Effect of Standardization \n\nWe will create three data frames standardized with each of the three\ntechniques.\n\n```{r, warning=FALSE}\nZ_VariableWise <- emotion %>%\n  standardize()\n\nZ_ParticipantWise <- emotion %>%\n  group_by(Participant_ID) %>%\n  standardize()\n\nZ_Full <- emotion %>%\n  group_by(Participant_ID) %>%\n  standardize() %>%\n  ungroup() %>%\n  standardize()\n```\n\nLet's see how these three standardization techniques affected the **Valence**\nvariable.\n\n### Across Participants\n\nWe can calculate the mean and SD of *Valence* across all participants:\n\n```{r, eval=FALSE}\n# Create a convenient function to print\nsummarise_Subjective_Valence <- function(data) {\n  df_name <- deparse(substitute(data))\n  data %>%\n    ungroup() %>%\n    summarise(\n      DF = df_name,\n      Mean = mean(Subjective_Valence),\n      SD = sd(Subjective_Valence)\n    )\n}\n# Check the results\nrbind(\n  summarise_Subjective_Valence(Z_VariableWise),\n  summarise_Subjective_Valence(Z_ParticipantWise),\n  summarise_Subjective_Valence(Z_Full)\n)\n```\n\n```{r, echo=FALSE}\n# Create a convenient function to print\nsummarise_Subjective_Valence <- function(data) {\n  df_name <- deparse(substitute(data))\n  data <- data %>%\n    ungroup() %>%\n    summarise(\n      Mean = mean(Subjective_Valence),\n      SD = sd(Subjective_Valence)\n    )\n  cbind(DF = df_name, data)\n}\n# Check the results\nrbind(\n  summarise_Subjective_Valence(Z_VariableWise),\n  summarise_Subjective_Valence(Z_ParticipantWise),\n  summarise_Subjective_Valence(Z_Full)\n) %>%\n  knitr::kable(digits = 2)\n```\n\n\nThe **means** and the **SD** appear as fairly similar (0 and 1)...\n\n```{r, fig.width=7, fig.height=4.5, results='markup', fig.align='center'}\nlibrary(see)\nlibrary(ggplot2)\n\nggplot() +\n  geom_density(aes(Z_VariableWise$Subjective_Valence,\n    color = \"Z_VariableWise\"\n  ), linewidth = 1) +\n  geom_density(aes(Z_ParticipantWise$Subjective_Valence,\n    color = \"Z_ParticipantWise\"\n  ), linewidth = 1) +\n  geom_density(aes(Z_Full$Subjective_Valence,\n    color = \"Z_Full\"\n  ), linewidth = 1) +\n  see::theme_modern() +\n  labs(color = \"\")\n```\n\nand so do the marginal distributions...\n\n### At the Participant Level\n\nHowever, we can also look at what happens in the participant level. Let's look at\nthe first 5 participants:\n\n```{r, eval=FALSE}\n# Create convenient function\nprint_participants <- function(data) {\n  df_name <- deparse(substitute(data))\n  data %>%\n    group_by(Participant_ID) %>%\n    summarise(\n      DF = df_name,\n      Mean = mean(Subjective_Valence),\n      SD = sd(Subjective_Valence)\n    ) %>%\n    head(5) %>%\n    select(DF, everything())\n}\n\n# Check the results\nrbind(\n  print_participants(Z_VariableWise),\n  print_participants(Z_ParticipantWise),\n  print_participants(Z_Full)\n)\n```\n\n\n```{r, echo=FALSE}\n# Create convenient function\nprint_participants <- function(data) {\n  df_name <- deparse(substitute(data))\n  data %>%\n    group_by(Participant_ID) %>%\n    summarise(\n      Mean = mean(Subjective_Valence),\n      SD = sd(Subjective_Valence)\n    ) %>%\n    cbind(DF = df_name, .) %>%\n    head(5) %>%\n    select(DF, everything())\n}\n\n# Check the results\nrbind(\n  print_participants(Z_VariableWise),\n  print_participants(Z_ParticipantWise),\n  print_participants(Z_Full)\n) %>%\n  knitr::kable(digits = 2)\n```\n\nSeems like *full* and *participant-wise* standardization give similar results,\nbut different ones than *variable-wise* standardization.\n\n### Compare\n\nLet's do a **correlation** between the **variable-wise and participant-wise\nmethods**.\n\n```{r, fig.width=7, fig.height=4.5, results='markup', fig.align='center'}\nr <- cor.test(\n  Z_VariableWise$Subjective_Valence,\n  Z_ParticipantWise$Subjective_Valence\n)\n\ndata.frame(\n  Original = emotion$Subjective_Valence,\n  VariableWise = Z_VariableWise$Subjective_Valence,\n  ParticipantWise = Z_ParticipantWise$Subjective_Valence\n) %>%\n  ggplot(aes(x = VariableWise, y = ParticipantWise, colour = Original)) +\n  geom_point(alpha = 0.75, shape = 16) +\n  geom_smooth(method = \"lm\", color = \"black\") +\n  scale_color_distiller(palette = 1) +\n  ggtitle(paste0(\"r = \", round(r$estimate, 2))) +\n  see::theme_modern()\n```\n\nWhile the three standardization methods roughly present the same characteristics\nat a general level (mean 0 and SD 1) and a similar distribution, their values\nare not exactly the same!\n\nLet's now answer the original question by investigating the **linear relationship between valence and autobiographical link**. We can do this by\nrunning a mixed-effects model with participants entered as random effects.\n\n```{r}\nlibrary(lme4)\nm_raw <- lmer(\n  formula = Subjective_Valence ~ Autobiographical_Link + (1 | Participant_ID),\n  data = emotion\n)\nm_VariableWise <- update(m_raw, data = Z_VariableWise)\nm_ParticipantWise <- update(m_raw, data = Z_ParticipantWise)\nm_Full <- update(m_raw, data = Z_Full)\n```\n\nWe can extract the parameters of interest from each model, and find:\n\n```{r}\n# Convenient function\nget_par <- function(model) {\n  mod_name <- deparse(substitute(model))\n  parameters::model_parameters(model) %>%\n    mutate(Model = mod_name) %>%\n    select(-Parameter) %>%\n    select(Model, everything()) %>%\n    .[-1, ]\n}\n\n# Run the model on all datasets\nrbind(\n  get_par(m_raw),\n  get_par(m_VariableWise),\n  get_par(m_ParticipantWise),\n  get_par(m_Full)\n)\n```\n\nAs we can see, **variable-wise** standardization only affects **the coefficient** (which is expected, as it changes the unit), but not the test\nstatistic or statistical significance. However, using **participant-wise**\nstandardization *does* affect the coefficient **and** the significance. \n\n**No method is better or more justified, and the choice depends on the specific case, context, data and goal.**\n\n### Conclusion\n\n1. **Standardization can be useful in *some* cases and should be justified**.\n\n2. **Variable and Participant-wise standardization methods *appear* to produce similar data**.\n\n3. **Variable and Participant-wise standardization can lead to different results**.\n\n4. **The chosen method can strongly influence the results and should therefore be explicitly stated and justified to enhance reproducibility of results**.\n\nWe showed here yet another way of **sneakily tweaking the data** that can change\nthe results. To prevent its use as a bad practice, we can only highlight the\nimportance of open data, open analysis/scripts, and preregistration.\n\n# See also \n\n- `datawizard::demean()`: <https://easystats.github.io/datawizard/reference/demean.html>\n- `standardize_parameters(method = \"pseudo\")` for mixed-effects models\n<https://easystats.github.io/parameters/articles/standardize_parameters_effsize.html>\n\n# References\n"
  },
  {
    "path": "vignettes/tidyverse_translation.Rmd",
    "content": "---\ntitle: \"Coming from 'tidyverse'\"\noutput:\n  rmarkdown::html_vignette:\n    toc: true\nvignette: >\n  \\usepackage[utf8]{inputenc}\n  %\\VignetteIndexEntry{Coming from 'tidyverse'}\n  %\\VignetteEngine{knitr::rmarkdown}\n---\n\n```{r setup, message=FALSE, warning=FALSE, include=FALSE, eval = TRUE}\nlibrary(knitr)\noptions(knitr.kable.NA = \"\")\nknitr::opts_chunk$set(\n  eval = FALSE,\n  message = FALSE,\n  warning = FALSE,\n  dpi = 300\n)\n\npkgs <- c(\n  \"dplyr\",\n  \"tidyr\"\n)\nall_deps_available <- all(vapply(pkgs, requireNamespace, quietly = TRUE, FUN.VALUE = logical(1L)))\n\nif (all_deps_available) {\n  library(datawizard)\n  library(dplyr)\n  library(tidyr)\n}\n\n# Since we explicitly put `eval = TRUE` for some chunks, we can't rely on\n# `knitr::opts_chunk$set(eval = FALSE)` at the beginning of the script.\n# Therefore, we introduce a logical that is `FALSE` only if all suggested\n# dependencies are not installed (cf easystats/easystats#317)\nevaluate_chunk <- all_deps_available && getRversion() >= \"4.1.0\"\n```\n\nThis vignette can be referred to by citing the following:\n\nPatil et al., (2022). datawizard: An R Package for Easy Data Preparation and Statistical Transformations. *Journal of Open Source Software*, *7*(78), 4684, https://doi.org/10.21105/joss.04684\n\n```{css, echo=FALSE, eval = TRUE}\n.datawizard, .datawizard > .sourceCode {\n  background-color: #e6e6ff;\n}\n.tidyverse, .tidyverse > .sourceCode {\n  background-color: #d9f2e5;\n}\n.custom_note {\n  border-left: solid 5px hsl(220, 100%, 30%);\n  background-color: hsl(220, 100%, 95%);\n  padding: 5px;\n  margin-bottom: 10px\n}\n```\n\n# Introduction\n\n`{datawizard}` package aims to make basic data wrangling easier than\nwith base R. The data wrangling workflow it supports is similar to the one\nsupported by the tidyverse package combination of `{dplyr}` and `{tidyr}`. However,\none of its main features is that it has a very few dependencies: `{stats}` and `{utils}`\n(included in base R) and `{insight}`, which is the core package of the _easystats_\necosystem. This package grew organically to simultaneously satisfy the\n\"0 non-base hard dependency\" principle of _easystats_ and the data wrangling needs\nof the constituent packages in this ecosystem. It is also\nimportant to note that `{datawizard}` was designed to avoid namespace collisions\nwith `{tidyverse}` packages.\n\nIn this article, we will see how to go through basic data wrangling steps with\n`{datawizard}`. We will also compare it to the `{tidyverse}` syntax for achieving the same.\nThis way, if you decide to make the switch, you can easily find the translations here.\nThis vignette is largely inspired from `{dplyr}`'s [Getting started vignette](https://dplyr.tidyverse.org/articles/dplyr.html).\n\n<!-- NOTE: use raw HTML so that vignette can compile even if `evaluate_chunk` is FALSE. -->\n<!-- See e.g. #527 -->\n\n<div class=\"custom_note\">\n  <p>\n    Note: In this vignette, we use the native pipe-operator, `|>`, which was\n    introduced in R 4.1. Users of R version 3.6 or 4.0 should replace the native\n    pipe by magrittr's one (`%>%`) so that examples work.\n  </p>\n</div>\n\n```{r, eval = evaluate_chunk}\nlibrary(dplyr)\nlibrary(tidyr)\nlibrary(datawizard)\n\ndata(efc)\nefc <- head(efc)\n```\n\n# Workhorses\n\nBefore we look at their *tidyverse* equivalents, we can first have a look at\n`{datawizard}`'s key functions for data wrangling:\n\n| Function          | Operation                                                        |\n| :---------------- | :--------------------------------------------------------------- |\n| `data_filter()`   | [to select only certain observations](#filtering)                |\n| `data_select()`   | [to select only a few variables](#selecting)                     |\n| `data_modify()`   | [to create variables or modify existing ones](#modifying)        |\n| `data_arrange()`  | [to sort observations](#sorting)                                 |\n| `data_extract()`  | [to extract a single variable](#extracting)                      |\n| `data_rename()`   | [to rename variables](#renaming)                                 |\n| `data_relocate()` | [to reorder a data frame](#relocating)                           |\n| `data_to_long()`  | [to convert data from wide to long](#reshaping)                  |\n| `data_to_wide()`  | [to convert data from long to wide](#reshaping)                  |\n| `data_join()`     | [to join two data frames](#joining)                              |\n| `data_unite()`    | [to concatenate several columns into a single one](#uniting)     |\n| `data_separate()` | [to separate a single column into multiple columns](#separating) |\n\nNote that there are a few functions in `{datawizard}` that have no strict equivalent\nin `{dplyr}` or `{tidyr}` (e.g `data_rotate()`), and so we won't discuss them in\nthe next section.\n\n# Equivalence with `{dplyr}` / `{tidyr}`\n\nBefore we look at them individually, let's first have a look at the summary table of this equivalence.\n\n| Function          | Tidyverse equivalent(s)                                             |\n| :---------------- | :------------------------------------------------------------------ |\n| `data_filter()`   | `dplyr::filter()`, `dplyr::slice()`                                 |\n| `data_select()`   | `dplyr::select()`                                                   |\n| `data_modify()`   | `dplyr::mutate()`                                                   |\n| `data_arrange()`  | `dplyr::arrange()`                                                  |\n| `data_extract()`  | `dplyr::pull()`                                                     |\n| `data_rename()`   | `dplyr::rename()`                                                   |\n| `data_relocate()` | `dplyr::relocate()`                                                 |\n| `data_to_long()`  | `tidyr::pivot_longer()`                                             |\n| `data_to_wide()`  | `tidyr::pivot_wider()`                                              |\n| `data_join()`     | `dplyr::inner_join()`, `dplyr::left_join()`, `dplyr::right_join()`, |\n|                   | `dplyr::full_join()`, `dplyr::anti_join()`, `dplyr::semi_join()`    |\n| `data_peek()`     | `dplyr::glimpse()`                                                  |\n| `data_unite()`    | `tidyr::unite()`                                                    |\n| `data_separate()` | `tidyr::separate()`                                                 |\n\n## Filtering {#filtering}\n\n`data_filter()` is a wrapper around `subset()`. However, if you want to have several filtering conditions, you can either use `&` (as in `subset()`) or `,` (as in `dplyr::filter()`).\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r filter, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_filter(\n    skin_color == \"light\",\n    eye_color == \"brown\"\n  )\n\n# or\nstarwars |>\n  data_filter(\n    skin_color == \"light\" &\n      eye_color == \"brown\"\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  filter(\n    skin_color == \"light\",\n    eye_color == \"brown\"\n  )\n```\n:::\n\n::::\n\n```{r filter, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n<!-- Shorten output to make it easier to read: -->\n```{r, echo = FALSE, eval = evaluate_chunk}\nstarwars <- head(starwars)\n```\n\n## Selecting {#selecting}\n\n`data_select()` is the equivalent of `dplyr::select()`.\nThe main difference between these two functions is that `data_select()` uses two\narguments (`select` and `exclude`) and requires quoted column names if we want to\nselect several variables, while `dplyr::select()` accepts any unquoted column names.\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r select1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_select(select = c(\"hair_color\", \"skin_color\", \"eye_color\"))\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  select(hair_color, skin_color, eye_color)\n```\n:::\n\n::::\n\n```{r select1, eval = evaluate_chunk, echo = FALSE}\n```\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r select2, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_select(select = -ends_with(\"color\"))\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  select(-ends_with(\"color\"))\n```\n:::\n\n::::\n\n```{r select2, eval = evaluate_chunk, echo = FALSE}\n```\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n<!-- TODO: Although we say the column names need to be quoted, they are unquoted\nhere and quoting them won't work. Should we comment on that? -->\n\n```{r select3, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_select(select = -(hair_color:eye_color))\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  select(!(hair_color:eye_color))\n```\n:::\n\n::::\n\n```{r select3, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r select4, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_select(exclude = regex(\"color$\"))\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  select(-contains(\"color$\"))\n```\n:::\n\n::::\n\n```{r select4, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r select5, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_select(select = is.numeric)\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  select(where(is.numeric))\n```\n:::\n\n::::\n\n```{r select5, eval = evaluate_chunk, echo = FALSE}\n```\n\nYou can find a list of all the select helpers with `?data_select`.\n\n\n## Modifying {#modifying}\n\n`data_modify()` is a wrapper around `base::transform()` but has several additional\nbenefits:\n\n* it allows us to use newly created variables in the following expressions;\n* it works with grouped data;\n* it preserves variable attributes such as labels;\n* it accepts expressions as character vectors so that it is easy to program with it\n\n\nThis last point is also the main difference between `data_modify()` and\n`dplyr::mutate()`.\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r modify1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nefc |>\n  data_modify(\n    c12hour_c = center(c12hour),\n    c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),\n    c12hour_z2 = standardize(c12hour)\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nefc |>\n  mutate(\n    c12hour_c = center(c12hour),\n    c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE),\n    c12hour_z2 = standardize(c12hour)\n  )\n```\n:::\n\n::::\n\n```{r modify1, eval = evaluate_chunk, echo = FALSE}\n```\n\n`data_modify()` supports expressions as strings via its `as_expr()` helper function.\n\n```{r eval=evaluate_chunk}\nnew_exp <- c(\n  \"c12hour_c = center(c12hour)\",\n  \"c12hour_z = c12hour_c / sd(c12hour, na.rm = TRUE)\"\n)\ndata_modify(efc, as_expr(new_exp))\n```\n\nThis makes it easy to use it in custom functions:\n\n```{r eval=evaluate_chunk}\nmiles_to_km <- function(data, var) {\n  data_modify(\n    data,\n    as_expr(paste0(\"km = \", var, \"* 1.609344\"))\n  )\n}\n\ndistance <- data.frame(miles = c(1, 8, 233, 88, 9))\ndistance\n\nmiles_to_km(distance, \"miles\")\n```\n\n\n\n## Sorting {#sorting}\n\n`data_arrange()` is the equivalent of `dplyr::arrange()`. It takes two arguments:\na data frame, and a vector of column names used to sort the rows. Note that contrary\nto most other functions in `{datawizard}`, it is not possible to use select helpers\nsuch as `starts_with()` in `data_arrange()`.\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n:::{}\n```{r arrange1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_arrange(c(\"hair_color\", \"height\"))\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  arrange(hair_color, height)\n```\n:::\n\n::::\n\n```{r arrange1, eval = evaluate_chunk, echo = FALSE}\n```\n\nYou can also sort variables in descending order by putting a `\"-\"` in front of\ntheir name, like below:\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n:::{}\n```{r arrange2, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_arrange(c(\"-hair_color\", \"-height\"))\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  arrange(desc(hair_color), -height)\n```\n:::\n\n::::\n\n```{r arrange2, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n## Extracting {#extracting}\n\nAlthough we mostly work on data frames, it is sometimes useful to extract a single\ncolumn as a vector. This can be done with `data_extract()`, which reproduces the\nbehavior of `dplyr::pull()`:\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n:::{}\n```{r extract1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_extract(gender)\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  pull(gender)\n```\n:::\n\n::::\n\n```{r extract1, eval = evaluate_chunk, echo = FALSE}\n```\n\nWe can also specify several variables in `select`. In this case, `data_extract()`\nis equivalent to `data_select()`:\n\n```{r eval = evaluate_chunk}\nstarwars |>\n  data_extract(select = contains(\"color\"))\n```\n\n\n\n\n## Renaming {#renaming}\n\n`data_rename()` is the equivalent of `dplyr::rename()` but the syntax between the\ntwo is different. While `dplyr::rename()` takes new-old pairs of column\nnames, `data_rename()` requires a vector of column names to rename, and then\na vector of new names for these columns that must be of the same length.\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r rename1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_rename(\n    select = c(\"sex\", \"hair_color\"),\n    replacement = c(\"Sex\", \"Hair Color\")\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  rename(\n    Sex = sex,\n    \"Hair Color\" = hair_color\n  )\n```\n:::\n\n::::\n\n```{r rename1, eval = evaluate_chunk, echo = FALSE}\n```\n\nThe way `data_rename()` is designed makes it easy to apply the same modifications\nto a vector of column names. For example, we can remove underscores and use\nTitleCase with the following code:\n\n```{r rename2}\nto_rename <- names(starwars)\n\nstarwars |>\n  data_rename(\n    select = to_rename,\n    replacement = tools::toTitleCase(gsub(\"_\", \" \", to_rename, fixed = TRUE))\n  )\n```\n\n```{r rename2, eval = evaluate_chunk, echo = FALSE}\n```\n\nIt is also possible to add a prefix or a suffix to all or a subset of variables\nwith `data_addprefix()` and `data_addsuffix()`. The argument `select` accepts\nall select helpers that we saw above with `data_select()`:\n\n```{r rename3}\nstarwars |>\n  data_addprefix(\n    pattern = \"OLD.\",\n    select = contains(\"color\")\n  ) |>\n  data_addsuffix(\n    pattern = \".NEW\",\n    select = -contains(\"color\")\n  )\n```\n\n```{r rename3, eval = evaluate_chunk, echo = FALSE}\n```\n\n## Relocating {#relocating}\n\nSometimes, we want to relocate one or a small subset of columns in the dataset.\nRather than typing many names in `data_select()`, we can use `data_relocate()`,\nwhich is the equivalent of `dplyr::relocate()`. Just like `data_select()`, we can\nspecify a list of variables we want to relocate with `select` and `exclude`.\nThen, the arguments `before` and `after`^[Note that we use `before` and `after`\nwhereas `dplyr::relocate()` uses `.before` and `.after`.] specify where the selected columns should\nbe relocated:\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r relocate1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nstarwars |>\n  data_relocate(sex:homeworld, before = \"height\")\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nstarwars |>\n  relocate(sex:homeworld, .before = height)\n```\n:::\n\n::::\n\n```{r relocate1, eval = evaluate_chunk, echo = FALSE}\n```\n\nIn addition to column names, `before` and `after` accept column indices. Finally,\none can use `before = -1` to relocate the selected columns just before the last\ncolumn, or `after = -1` to relocate them after the last column.\n\n```{r eval = evaluate_chunk}\n# ---------- datawizard -----------\nstarwars |>\n  data_relocate(sex:homeworld, after = -1)\n```\n\n\n## Reshaping {#reshaping}\n\n### Longer\n\nReshaping data from wide to long or from long to wide format can be done with\n`data_to_long()` and `data_to_wide()`. These functions were designed to match\n`tidyr::pivot_longer()` and `tidyr::pivot_wider()` arguments, so that the only\nthing to do is to change the function name. However, not all of\n`tidyr::pivot_longer()` and `tidyr::pivot_wider()` features are available yet.\n\nWe will use the `relig_income` dataset, as in the [`{tidyr}` vignette](https://tidyr.tidyverse.org/articles/pivot.html).\n\n```{r eval = evaluate_chunk}\nrelig_income\n```\n\n\nWe would like to reshape this dataset to have 3 columns: religion, count, and\nincome. The column \"religion\" doesn't need to change, so we exclude it with\n`-religion`. Then, each remaining column corresponds to an income category.\nTherefore, we want to move all these column names to a single column called\n\"income\". Finally, the values corresponding to each of these columns will be\nreshaped to be in a single new column, called \"count\".\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r pivot1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nrelig_income |>\n  data_to_long(\n    -religion,\n    names_to = \"income\",\n    values_to = \"count\"\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nrelig_income |>\n  pivot_longer(\n    !religion,\n    names_to = \"income\",\n    values_to = \"count\"\n  )\n```\n:::\n\n::::\n\n```{r pivot1, eval = evaluate_chunk, echo = FALSE}\n```\n\n\nTo explore a bit more the arguments of `data_to_long()`, we will use another\ndataset: the `billboard` dataset.\n```{r eval = evaluate_chunk}\nbillboard\n```\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r pivot2, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nbillboard |>\n  data_to_long(\n    cols = starts_with(\"wk\"),\n    names_to = \"week\",\n    values_to = \"rank\",\n    values_drop_na = TRUE\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nbillboard |>\n  pivot_longer(\n    cols = starts_with(\"wk\"),\n    names_to = \"week\",\n    values_to = \"rank\",\n    values_drop_na = TRUE\n  )\n```\n:::\n\n::::\n\n```{r pivot2, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n### Wider\n\nOnce again, we use an example in the `{tidyr}` vignette to show how close `data_to_wide()`\nand `pivot_wider()` are:\n```{r eval = evaluate_chunk}\nfish_encounters\n```\n\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r pivot3, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nfish_encounters |>\n  data_to_wide(\n    names_from = \"station\",\n    values_from = \"seen\"\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nfish_encounters |>\n  pivot_wider(\n    names_from = station,\n    values_from = seen\n  )\n```\n:::\n\n::::\n\n```{r pivot3, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n\n## Joining {#joining}\n\n<!-- explain a bit more the args of data_join -->\n\nIn `{datawizard}`, joining datasets is done with `data_join()` (or its alias\n`data_merge()`). Contrary to `{dplyr}`, this unique function takes care of all\ntypes of join, which are then specified inside the function with the argument\n`join` (by default, `join = \"left\"`).\n\nBelow, we show how to perform the four most common joins: full, left, right and\ninner. We will use the datasets `band_members`and `band_instruments` provided by `{dplyr}`:\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r eval = evaluate_chunk}\nband_members\n```\n:::\n\n::: {}\n\n```{r eval = evaluate_chunk}\nband_instruments\n```\n:::\n\n::::\n\n\n### Full join\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r join1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nband_members |>\n  data_join(band_instruments, join = \"full\")\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nband_members |>\n  full_join(band_instruments)\n```\n:::\n\n::::\n\n```{r join1, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n\n### Left and right joins\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r join2, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nband_members |>\n  data_join(band_instruments, join = \"left\")\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nband_members |>\n  left_join(band_instruments)\n```\n:::\n\n::::\n\n```{r join2, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r join3, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nband_members |>\n  data_join(band_instruments, join = \"right\")\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nband_members |>\n  right_join(band_instruments)\n```\n:::\n\n::::\n\n```{r join3, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n\n### Inner join\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r join4, class.source = \"datawizard\"}\n# ---------- datawizard -----------\nband_members |>\n  data_join(band_instruments, join = \"inner\")\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nband_members |>\n  inner_join(band_instruments)\n```\n:::\n\n::::\n\n```{r join4, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n## Uniting {#uniting}\n\nUniting variables is useful e.g to create unique indices by combining several\nvariables or to gather years, months, and days into a single date. `data_unite()`\noffers an interface very close to `tidyr::unite()`:\n\n```{r eval=evaluate_chunk}\ntest <- data.frame(\n  year = 2002:2004,\n  month = c(\"02\", \"03\", \"09\"),\n  day = c(\"11\", \"22\", \"28\"),\n  stringsAsFactors = FALSE\n)\ntest\n```\n\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r unite1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\ntest |>\n  data_unite(\n    new_column = \"date\",\n    select = c(\"year\", \"month\", \"day\"),\n    separator = \"-\"\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\ntest |>\n  unite(\n    col = \"date\",\n    year, month, day,\n    sep = \"-\"\n  )\n```\n:::\n\n::::\n\n```{r unite1, eval = evaluate_chunk, echo = FALSE}\n```\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r unite2, class.source = \"datawizard\"}\n# ---------- datawizard -----------\ntest |>\n  data_unite(\n    new_column = \"date\",\n    select = c(\"year\", \"month\", \"day\"),\n    separator = \"-\",\n    append = TRUE\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\ntest |>\n  unite(\n    col = \"date\",\n    year, month, day,\n    sep = \"-\",\n    remove = FALSE\n  )\n```\n:::\n\n::::\n\n```{r unite2, eval = evaluate_chunk, echo = FALSE}\n```\n\n\n## Separating {#separating}\n\nSeparating variables is the counterpart to uniting variables and is useful to split values into multiple columns, e.g. when splitting a date into values for years, months and days. `data_separate()` offers an interface very close to `tidyr::separate()`:\n\n```{r eval=evaluate_chunk}\ntest <- data.frame(\n  date_arrival = c(\"2002-02-11\", \"2003-03-22\", \"2004-09-28\"),\n  date_departure = c(\"2002-03-15\", \"2003-03-28\", \"2004-09-30\"),\n  stringsAsFactors = FALSE\n)\ntest\n```\n\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r separate1, class.source = \"datawizard\"}\n# ---------- datawizard -----------\ntest |>\n  data_separate(\n    select = \"date_arrival\",\n    new_columns = c(\"Year\", \"Month\", \"Day\")\n  )\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\ntest |>\n  separate(\n    date_arrival,\n    into = c(\"Year\", \"Month\", \"Day\")\n  )\n```\n:::\n\n::::\n\n```{r separate1, eval = evaluate_chunk, echo = FALSE}\n```\n\n\nUnlike `tidyr::separate()`, you can separate multiple columns in one step with `data_separate()`.\n\n```{r eval = evaluate_chunk}\ntest |>\n  data_separate(\n    new_columns = list(\n      date_arrival = c(\"Arr_Year\", \"Arr_Month\", \"Arr_Day\"),\n      date_departure = c(\"Dep_Year\", \"Dep_Month\", \"Dep_Day\")\n    )\n  )\n```\n\n\n# Other useful functions\n\n`{datawizard}` contains other functions that are not necessarily included in\n`{dplyr}` or `{tidyr}` or do not directly modify the data. Some of them are\ninspired from the package `janitor`.\n\n## Work with rownames\n\nWe can convert a column in rownames and move rownames to a new column with\n`rownames_as_column()` and `column_as_rownames()`:\n\n```{r eval = evaluate_chunk}\nmtcars <- head(mtcars)\nmtcars\n\nmtcars2 <- mtcars |>\n  rownames_as_column(var = \"model\")\n\nmtcars2\n\nmtcars2 |>\n  column_as_rownames(var = \"model\")\n```\n\n## Work with row ids\n\n`rowid_as_column()` is close but not identical to `tibble::rowid_to_column()`.\nThe main difference is when we use it with grouped data. While `tibble::rowid_to_column()`\nuses one distinct rowid for every row in the dataset, `rowid_as_column()` creates\none id for every row *in each group*. Therefore, two rows in different groups\ncan have the same row id.\n\nThis means that `rowid_as_column()` is closer to using `n()` in `mutate()`, like\nthe following:\n\n```{r eval=evaluate_chunk}\ntest <- data.frame(\n  group = c(\"A\", \"A\", \"B\", \"B\"),\n  value = c(3, 5, 8, 1),\n  stringsAsFactors = FALSE\n)\ntest\n\ntest |>\n  data_group(group) |>\n  tibble::rowid_to_column()\n\ntest |>\n  data_group(group) |>\n  rowid_as_column()\n\ntest |>\n  data_group(group) |>\n  mutate(id = seq_len(n()))\n```\n\n\n## Work with column names\n\nWhen dealing with messy data, it is sometimes useful to use a row as column\nnames, and vice versa. This can be done with `row_to_colnames()` and\n`colnames_to_row()`.\n\n```{r eval = evaluate_chunk}\nx <- data.frame(\n  X_1 = c(NA, \"Title\", 1:3),\n  X_2 = c(NA, \"Title2\", 4:6)\n)\nx\nx2 <- x |>\n  row_to_colnames(row = 2)\nx2\n\nx2 |>\n  colnames_to_row()\n```\n\n## Take a quick look at the data\n\n:::: {style=\"display: grid; grid-template-columns: 50% 50%; grid-column-gap: 10px;\"}\n\n::: {}\n\n```{r glimpse, class.source = \"datawizard\"}\n# ---------- datawizard -----------\ndata_peek(iris)\n```\n:::\n\n::: {}\n\n```{r, class.source = \"tidyverse\"}\n# ---------- tidyverse -----------\nglimpse(iris)\n```\n:::\n\n::::\n\n```{r glimpse, eval = evaluate_chunk, echo = FALSE}\n```\n"
  }
]